datasketches 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/cpc_wrapper.cpp +12 -13
- data/ext/datasketches/ext.cpp +1 -1
- data/ext/datasketches/ext.h +4 -0
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/fi_wrapper.cpp +6 -8
- data/ext/datasketches/hll_wrapper.cpp +13 -14
- data/ext/datasketches/kll_wrapper.cpp +28 -76
- data/ext/datasketches/theta_wrapper.cpp +27 -41
- data/ext/datasketches/vo_wrapper.cpp +4 -6
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/README.md +4 -4
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
- data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
- data/vendor/datasketches-cpp/python/README.md +6 -3
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
- data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
- data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
- data/vendor/datasketches-cpp/setup.py +5 -3
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
- data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
- data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
- metadata +43 -34
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -20,103 +20,70 @@
|
|
20
20
|
#ifndef THETA_UNION_HPP_
|
21
21
|
#define THETA_UNION_HPP_
|
22
22
|
|
23
|
-
#include
|
24
|
-
#include <functional>
|
25
|
-
#include <climits>
|
26
|
-
|
23
|
+
#include "serde.hpp"
|
27
24
|
#include "theta_sketch.hpp"
|
25
|
+
#include "theta_union_base.hpp"
|
28
26
|
|
29
27
|
namespace datasketches {
|
30
28
|
|
31
|
-
|
32
|
-
* author Alexander Saydakov
|
33
|
-
* author Lee Rhodes
|
34
|
-
* author Kevin Lang
|
35
|
-
*/
|
36
|
-
|
37
|
-
template<typename A>
|
29
|
+
template<typename Allocator = std::allocator<uint64_t>>
|
38
30
|
class theta_union_alloc {
|
39
31
|
public:
|
40
|
-
|
32
|
+
using Entry = uint64_t;
|
33
|
+
using ExtractKey = trivial_extract_key;
|
34
|
+
using Sketch = theta_sketch_alloc<Allocator>;
|
35
|
+
using CompactSketch = compact_theta_sketch_alloc<Allocator>;
|
36
|
+
using resize_factor = theta_constants::resize_factor;
|
37
|
+
|
38
|
+
struct pass_through_policy {
|
39
|
+
uint64_t operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
|
40
|
+
unused(incoming_entry);
|
41
|
+
return internal_entry;
|
42
|
+
}
|
43
|
+
};
|
44
|
+
using State = theta_union_base<Entry, ExtractKey, pass_through_policy, Sketch, CompactSketch, Allocator>;
|
41
45
|
|
42
46
|
// No constructor here. Use builder instead.
|
47
|
+
class builder;
|
43
48
|
|
44
49
|
/**
|
45
50
|
* This method is to update the union with a given sketch
|
46
51
|
* @param sketch to update the union with
|
47
52
|
*/
|
48
|
-
|
53
|
+
template<typename FwdSketch>
|
54
|
+
void update(FwdSketch&& sketch);
|
49
55
|
|
50
56
|
/**
|
51
57
|
* This method produces a copy of the current state of the union as a compact sketch.
|
52
58
|
* @param ordered optional flag to specify if ordered sketch should be produced
|
53
59
|
* @return the result of the union
|
54
60
|
*/
|
55
|
-
|
61
|
+
CompactSketch get_result(bool ordered = true) const;
|
56
62
|
|
57
63
|
private:
|
58
|
-
|
59
|
-
uint64_t theta_;
|
60
|
-
update_theta_sketch_alloc<A> state_;
|
64
|
+
State state_;
|
61
65
|
|
62
66
|
// for builder
|
63
|
-
theta_union_alloc(uint64_t theta,
|
67
|
+
theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Allocator& allocator);
|
64
68
|
};
|
65
69
|
|
66
|
-
// builder
|
67
|
-
|
68
70
|
template<typename A>
|
69
|
-
class theta_union_alloc<A>::builder {
|
71
|
+
class theta_union_alloc<A>::builder: public theta_base_builder<builder, A> {
|
70
72
|
public:
|
71
|
-
|
72
|
-
|
73
|
-
/**
|
74
|
-
* Set log2(k), where k is a nominal number of entries in the sketch
|
75
|
-
* @param lg_k base 2 logarithm of nominal number of entries
|
76
|
-
* @return this builder
|
77
|
-
*/
|
78
|
-
builder& set_lg_k(uint8_t lg_k);
|
79
|
-
|
80
|
-
/**
|
81
|
-
* Set resize factor for the internal hash table (defaults to 8)
|
82
|
-
* @param rf resize factor
|
83
|
-
* @return this builder
|
84
|
-
*/
|
85
|
-
builder& set_resize_factor(resize_factor rf);
|
86
|
-
|
87
|
-
/**
|
88
|
-
* Set sampling probability (initial theta). The default is 1, so the sketch retains
|
89
|
-
* all entries until it reaches the limit, at which point it goes into the estimation mode
|
90
|
-
* and reduces the effective sampling probability (theta) as necessary.
|
91
|
-
* @param p sampling probability
|
92
|
-
* @return this builder
|
93
|
-
*/
|
94
|
-
builder& set_p(float p);
|
95
|
-
|
96
|
-
/**
|
97
|
-
* Set the seed for the hash function. Should be used carefully if needed.
|
98
|
-
* Sketches produced with different seed are not compatible
|
99
|
-
* and cannot be mixed in set operations.
|
100
|
-
* @param seed hash seed
|
101
|
-
* @return this builder
|
102
|
-
*/
|
103
|
-
builder& set_seed(uint64_t seed);
|
73
|
+
builder(const A& allocator = A());
|
104
74
|
|
105
75
|
/**
|
106
76
|
* This is to create an instance of the union with predefined parameters.
|
107
|
-
* @return
|
77
|
+
* @return an instance of the union
|
108
78
|
*/
|
109
79
|
theta_union_alloc<A> build() const;
|
110
|
-
|
111
|
-
private:
|
112
|
-
typename update_theta_sketch_alloc<A>::builder sketch_builder;
|
113
80
|
};
|
114
81
|
|
115
82
|
// alias with default allocator for convenience
|
116
|
-
|
83
|
+
using theta_union = theta_union_alloc<std::allocator<uint64_t>>;
|
117
84
|
|
118
85
|
} /* namespace datasketches */
|
119
86
|
|
120
87
|
#include "theta_union_impl.hpp"
|
121
88
|
|
122
|
-
#
|
89
|
+
#endif
|
@@ -17,6 +17,9 @@
|
|
17
17
|
* under the License.
|
18
18
|
*/
|
19
19
|
|
20
|
+
#ifndef THETA_UNION_BASE_IMPL_HPP_
|
21
|
+
#define THETA_UNION_BASE_IMPL_HPP_
|
22
|
+
|
20
23
|
#include <algorithm>
|
21
24
|
|
22
25
|
#include "conditional_forward.hpp"
|
@@ -82,3 +85,5 @@ const P& theta_union_base<EN, EK, P, S, CS, A>::get_policy() const {
|
|
82
85
|
}
|
83
86
|
|
84
87
|
} /* namespace datasketches */
|
88
|
+
|
89
|
+
#endif
|
@@ -22,86 +22,30 @@
|
|
22
22
|
|
23
23
|
namespace datasketches {
|
24
24
|
|
25
|
-
/*
|
26
|
-
* author Alexander Saydakov
|
27
|
-
* author Lee Rhodes
|
28
|
-
* author Kevin Lang
|
29
|
-
*/
|
30
|
-
|
31
|
-
template<typename A>
|
32
|
-
theta_union_alloc<A>::theta_union_alloc(uint64_t theta, update_theta_sketch_alloc<A>&& state):
|
33
|
-
is_empty_(true), theta_(theta), state_(std::move(state)) {}
|
34
|
-
|
35
|
-
template<typename A>
|
36
|
-
void theta_union_alloc<A>::update(const theta_sketch_alloc<A>& sketch) {
|
37
|
-
if (sketch.is_empty()) return;
|
38
|
-
if (sketch.get_seed_hash() != state_.get_seed_hash()) throw std::invalid_argument("seed hash mismatch");
|
39
|
-
is_empty_ = false;
|
40
|
-
if (sketch.get_theta64() < theta_) theta_ = sketch.get_theta64();
|
41
|
-
if (sketch.is_ordered()) {
|
42
|
-
for (auto hash: sketch) {
|
43
|
-
if (hash >= theta_) break; // early stop
|
44
|
-
state_.internal_update(hash);
|
45
|
-
}
|
46
|
-
} else {
|
47
|
-
for (auto hash: sketch) if (hash < theta_) state_.internal_update(hash);
|
48
|
-
}
|
49
|
-
if (state_.get_theta64() < theta_) theta_ = state_.get_theta64();
|
50
|
-
}
|
51
|
-
|
52
25
|
template<typename A>
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
if (theta_ >= state_.theta_ && state_.get_num_retained() <= nom_num_keys) return state_.compact(ordered);
|
57
|
-
uint64_t theta = std::min(theta_, state_.get_theta64());
|
58
|
-
vector_u64<A> keys(state_.get_num_retained());
|
59
|
-
uint32_t num_keys = 0;
|
60
|
-
for (auto key: state_) {
|
61
|
-
if (key < theta) keys[num_keys++] = key;
|
62
|
-
}
|
63
|
-
if (num_keys > nom_num_keys) {
|
64
|
-
std::nth_element(keys.begin(), keys.begin() + nom_num_keys, keys.begin() + num_keys);
|
65
|
-
theta = keys[nom_num_keys];
|
66
|
-
num_keys = nom_num_keys;
|
67
|
-
}
|
68
|
-
if (num_keys != state_.get_num_retained()) {
|
69
|
-
keys.resize(num_keys);
|
70
|
-
}
|
71
|
-
if (ordered) std::sort(keys.begin(), keys.end());
|
72
|
-
return compact_theta_sketch_alloc<A>(false, theta, std::move(keys), state_.get_seed_hash(), ordered);
|
73
|
-
}
|
74
|
-
|
75
|
-
// builder
|
26
|
+
theta_union_alloc<A>::theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const A& allocator):
|
27
|
+
state_(lg_cur_size, lg_nom_size, rf, theta, seed, pass_through_policy(), allocator)
|
28
|
+
{}
|
76
29
|
|
77
30
|
template<typename A>
|
78
|
-
typename
|
79
|
-
|
80
|
-
|
31
|
+
template<typename SS>
|
32
|
+
void theta_union_alloc<A>::update(SS&& sketch) {
|
33
|
+
state_.update(std::forward<SS>(sketch));
|
81
34
|
}
|
82
35
|
|
83
36
|
template<typename A>
|
84
|
-
|
85
|
-
|
86
|
-
return *this;
|
37
|
+
auto theta_union_alloc<A>::get_result(bool ordered) const -> CompactSketch {
|
38
|
+
return state_.get_result(ordered);
|
87
39
|
}
|
88
40
|
|
89
41
|
template<typename A>
|
90
|
-
|
91
|
-
sketch_builder.set_p(p);
|
92
|
-
return *this;
|
93
|
-
}
|
94
|
-
|
95
|
-
template<typename A>
|
96
|
-
typename theta_union_alloc<A>::builder& theta_union_alloc<A>::builder::set_seed(uint64_t seed) {
|
97
|
-
sketch_builder.set_seed(seed);
|
98
|
-
return *this;
|
99
|
-
}
|
42
|
+
theta_union_alloc<A>::builder::builder(const A& allocator): theta_base_builder<builder, A>(allocator) {}
|
100
43
|
|
101
44
|
template<typename A>
|
102
|
-
|
103
|
-
|
104
|
-
|
45
|
+
auto theta_union_alloc<A>::builder::build() const -> theta_union_alloc {
|
46
|
+
return theta_union_alloc(
|
47
|
+
this->starting_sub_multiple(this->lg_k_ + 1, this->MIN_LG_K, static_cast<uint8_t>(this->rf_)),
|
48
|
+
this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->allocator_);
|
105
49
|
}
|
106
50
|
|
107
51
|
} /* namespace datasketches */
|
@@ -34,7 +34,7 @@ namespace datasketches {
|
|
34
34
|
template<
|
35
35
|
typename Entry,
|
36
36
|
typename ExtractKey,
|
37
|
-
typename Allocator
|
37
|
+
typename Allocator
|
38
38
|
>
|
39
39
|
struct theta_update_sketch_base {
|
40
40
|
using resize_factor = theta_constants::resize_factor;
|
@@ -147,7 +147,7 @@ protected:
|
|
147
147
|
static uint8_t starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf);
|
148
148
|
};
|
149
149
|
|
150
|
-
// key
|
150
|
+
// key extractor
|
151
151
|
|
152
152
|
struct trivial_extract_key {
|
153
153
|
template<typename T>
|
@@ -156,17 +156,7 @@ struct trivial_extract_key {
|
|
156
156
|
}
|
157
157
|
};
|
158
158
|
|
159
|
-
|
160
|
-
struct pair_extract_key {
|
161
|
-
K& operator()(std::pair<K, V>& entry) const {
|
162
|
-
return entry.first;
|
163
|
-
}
|
164
|
-
const K& operator()(const std::pair<K, V>& entry) const {
|
165
|
-
return entry.first;
|
166
|
-
}
|
167
|
-
};
|
168
|
-
|
169
|
-
// not zero
|
159
|
+
// key not zero
|
170
160
|
|
171
161
|
template<typename Entry, typename ExtractKey>
|
172
162
|
class key_not_zero {
|
@@ -195,12 +185,6 @@ static inline uint64_t compute_hash(const void* data, size_t length, uint64_t se
|
|
195
185
|
return (hashes.h1 >> 1); // Java implementation does unsigned shift >>> to make values positive
|
196
186
|
}
|
197
187
|
|
198
|
-
static inline uint16_t compute_seed_hash(uint64_t seed) {
|
199
|
-
HashState hashes;
|
200
|
-
MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
|
201
|
-
return hashes.h1;
|
202
|
-
}
|
203
|
-
|
204
188
|
// iterators
|
205
189
|
|
206
190
|
template<typename Entry, typename ExtractKey>
|
@@ -17,6 +17,9 @@
|
|
17
17
|
* under the License.
|
18
18
|
*/
|
19
19
|
|
20
|
+
#ifndef THETA_UPDATE_SKETCH_BASE_IMPL_HPP_
|
21
|
+
#define THETA_UPDATE_SKETCH_BASE_IMPL_HPP_
|
22
|
+
|
20
23
|
#include <iostream>
|
21
24
|
#include <sstream>
|
22
25
|
#include <algorithm>
|
@@ -69,7 +72,7 @@ entries_(nullptr)
|
|
69
72
|
|
70
73
|
template<typename EN, typename EK, typename A>
|
71
74
|
theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(theta_update_sketch_base&& other) noexcept:
|
72
|
-
allocator_(other.allocator_),
|
75
|
+
allocator_(std::move(other.allocator_)),
|
73
76
|
is_empty_(other.is_empty_),
|
74
77
|
lg_cur_size_(other.lg_cur_size_),
|
75
78
|
lg_nom_size_(other.lg_nom_size_),
|
@@ -387,3 +390,5 @@ auto theta_const_iterator<Entry, ExtractKey>::operator*() const -> const Entry&
|
|
387
390
|
}
|
388
391
|
|
389
392
|
} /* namespace datasketches */
|
393
|
+
|
394
|
+
#endif
|
@@ -20,11 +20,10 @@
|
|
20
20
|
#include <iostream>
|
21
21
|
|
22
22
|
#include <catch.hpp>
|
23
|
-
#include <jaccard_similarity.hpp>
|
24
23
|
|
25
|
-
|
24
|
+
#include "theta_jaccard_similarity.hpp"
|
26
25
|
|
27
|
-
|
26
|
+
namespace datasketches {
|
28
27
|
|
29
28
|
TEST_CASE("theta jaccard: empty", "[theta_sketch]") {
|
30
29
|
auto sk_a = update_theta_sketch::builder().build();
|
@@ -17,10 +17,10 @@
|
|
17
17
|
* under the License.
|
18
18
|
*/
|
19
19
|
|
20
|
-
#include <catch.hpp>
|
21
20
|
#include <fstream>
|
22
21
|
#include <sstream>
|
23
22
|
|
23
|
+
#include <catch.hpp>
|
24
24
|
#include <theta_sketch.hpp>
|
25
25
|
|
26
26
|
namespace datasketches {
|
@@ -134,75 +134,7 @@ TEST_CASE("theta sketch: estimation", "[theta_sketch]") {
|
|
134
134
|
REQUIRE(compact_sketch.get_upper_bound(1) > n);
|
135
135
|
}
|
136
136
|
|
137
|
-
TEST_CASE("theta sketch: deserialize
|
138
|
-
std::ifstream is;
|
139
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
140
|
-
is.open(inputPath + "theta_update_empty_from_java.sk", std::ios::binary);
|
141
|
-
auto sketchptr = theta_sketch::deserialize(is);
|
142
|
-
REQUIRE(sketchptr->is_empty());
|
143
|
-
REQUIRE_FALSE(sketchptr->is_estimation_mode());
|
144
|
-
REQUIRE(sketchptr->get_num_retained() == 0);
|
145
|
-
REQUIRE(sketchptr->get_theta() == 1.0);
|
146
|
-
REQUIRE(sketchptr->get_estimate() == 0.0);
|
147
|
-
REQUIRE(sketchptr->get_lower_bound(1) == 0.0);
|
148
|
-
REQUIRE(sketchptr->get_upper_bound(1) == 0.0);
|
149
|
-
}
|
150
|
-
|
151
|
-
TEST_CASE("theta sketch: deserialize update empty from java as subclass", "[theta_sketch]") {
|
152
|
-
std::ifstream is;
|
153
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
154
|
-
is.open(inputPath + "theta_update_empty_from_java.sk", std::ios::binary);
|
155
|
-
auto sketch = update_theta_sketch::deserialize(is);
|
156
|
-
REQUIRE(sketch.is_empty());
|
157
|
-
REQUIRE_FALSE(sketch.is_estimation_mode());
|
158
|
-
REQUIRE(sketch.get_num_retained() == 0);
|
159
|
-
REQUIRE(sketch.get_theta() == 1.0);
|
160
|
-
REQUIRE(sketch.get_estimate() == 0.0);
|
161
|
-
REQUIRE(sketch.get_lower_bound(1) == 0.0);
|
162
|
-
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
163
|
-
}
|
164
|
-
|
165
|
-
TEST_CASE("theta sketch: deserialize update estimation from java as base", "[theta_sketch]") {
|
166
|
-
std::ifstream is;
|
167
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
168
|
-
is.open(inputPath + "theta_update_estimation_from_java.sk", std::ios::binary);
|
169
|
-
auto sketchptr = theta_sketch::deserialize(is);
|
170
|
-
REQUIRE_FALSE(sketchptr->is_empty());
|
171
|
-
REQUIRE(sketchptr->is_estimation_mode());
|
172
|
-
REQUIRE(sketchptr->get_num_retained() == 5324);
|
173
|
-
REQUIRE(sketchptr->get_estimate() == Approx(10000.0).margin(10000 * 0.01));
|
174
|
-
REQUIRE(sketchptr->get_lower_bound(1) < 10000);
|
175
|
-
REQUIRE(sketchptr->get_upper_bound(1) > 10000);
|
176
|
-
}
|
177
|
-
|
178
|
-
TEST_CASE("theta sketch: deserialize update estimation from java as subclass", "[theta_sketch]") {
|
179
|
-
std::ifstream is;
|
180
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
181
|
-
is.open(inputPath + "theta_update_estimation_from_java.sk", std::ios::binary);
|
182
|
-
auto sketch = update_theta_sketch::deserialize(is);
|
183
|
-
REQUIRE_FALSE(sketch.is_empty());
|
184
|
-
REQUIRE(sketch.is_estimation_mode());
|
185
|
-
REQUIRE(sketch.get_num_retained() == 5324);
|
186
|
-
REQUIRE(sketch.get_estimate() == Approx(10000.0).margin(10000 * 0.01));
|
187
|
-
REQUIRE(sketch.get_lower_bound(1) < 10000);
|
188
|
-
REQUIRE(sketch.get_upper_bound(1) > 10000);
|
189
|
-
}
|
190
|
-
|
191
|
-
TEST_CASE("theta sketch: deserialize compact empty from java as base", "[theta_sketch]") {
|
192
|
-
std::ifstream is;
|
193
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
194
|
-
is.open(inputPath + "theta_compact_empty_from_java.sk", std::ios::binary);
|
195
|
-
auto sketchptr = theta_sketch::deserialize(is);
|
196
|
-
REQUIRE(sketchptr->is_empty());
|
197
|
-
REQUIRE_FALSE(sketchptr->is_estimation_mode());
|
198
|
-
REQUIRE(sketchptr->get_num_retained() == 0);
|
199
|
-
REQUIRE(sketchptr->get_theta() == 1.0);
|
200
|
-
REQUIRE(sketchptr->get_estimate() == 0.0);
|
201
|
-
REQUIRE(sketchptr->get_lower_bound(1) == 0.0);
|
202
|
-
REQUIRE(sketchptr->get_upper_bound(1) == 0.0);
|
203
|
-
}
|
204
|
-
|
205
|
-
TEST_CASE("theta sketch: deserialize compact empty from java as subclass", "[theta_sketch]") {
|
137
|
+
TEST_CASE("theta sketch: deserialize compact empty from java", "[theta_sketch]") {
|
206
138
|
std::ifstream is;
|
207
139
|
is.exceptions(std::ios::failbit | std::ios::badbit);
|
208
140
|
is.open(inputPath + "theta_compact_empty_from_java.sk", std::ios::binary);
|
@@ -216,21 +148,7 @@ TEST_CASE("theta sketch: deserialize compact empty from java as subclass", "[the
|
|
216
148
|
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
217
149
|
}
|
218
150
|
|
219
|
-
TEST_CASE("theta sketch: deserialize single item from java
|
220
|
-
std::ifstream is;
|
221
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
222
|
-
is.open(inputPath + "theta_compact_single_item_from_java.sk", std::ios::binary);
|
223
|
-
auto sketchptr = theta_sketch::deserialize(is);
|
224
|
-
REQUIRE_FALSE(sketchptr->is_empty());
|
225
|
-
REQUIRE_FALSE(sketchptr->is_estimation_mode());
|
226
|
-
REQUIRE(sketchptr->get_num_retained() == 1);
|
227
|
-
REQUIRE(sketchptr->get_theta() == 1.0);
|
228
|
-
REQUIRE(sketchptr->get_estimate() == 1.0);
|
229
|
-
REQUIRE(sketchptr->get_lower_bound(1) == 1.0);
|
230
|
-
REQUIRE(sketchptr->get_upper_bound(1) == 1.0);
|
231
|
-
}
|
232
|
-
|
233
|
-
TEST_CASE("theta sketch: deserialize single item from java as subclass", "[theta_sketch]") {
|
151
|
+
TEST_CASE("theta sketch: deserialize single item from java", "[theta_sketch]") {
|
234
152
|
std::ifstream is;
|
235
153
|
is.exceptions(std::ios::failbit | std::ios::badbit);
|
236
154
|
is.open(inputPath + "theta_compact_single_item_from_java.sk", std::ios::binary);
|
@@ -244,55 +162,21 @@ TEST_CASE("theta sketch: deserialize single item from java as subclass", "[theta
|
|
244
162
|
REQUIRE(sketch.get_upper_bound(1) == 1.0);
|
245
163
|
}
|
246
164
|
|
247
|
-
TEST_CASE("theta sketch: deserialize compact estimation from java
|
248
|
-
std::ifstream is;
|
249
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
250
|
-
is.open(inputPath + "theta_compact_estimation_from_java.sk", std::ios::binary);
|
251
|
-
auto sketchptr = theta_sketch::deserialize(is);
|
252
|
-
REQUIRE_FALSE(sketchptr->is_empty());
|
253
|
-
REQUIRE(sketchptr->is_estimation_mode());
|
254
|
-
REQUIRE(sketchptr->is_ordered());
|
255
|
-
REQUIRE(sketchptr->get_num_retained() == 4342);
|
256
|
-
REQUIRE(sketchptr->get_theta() == Approx(0.531700444213199).margin(1e-10));
|
257
|
-
REQUIRE(sketchptr->get_estimate() == Approx(8166.25234614053).margin(1e-10));
|
258
|
-
REQUIRE(sketchptr->get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
|
259
|
-
REQUIRE(sketchptr->get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
|
260
|
-
|
261
|
-
// the same construction process in Java must have produced exactly the same sketch
|
262
|
-
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
263
|
-
const int n = 8192;
|
264
|
-
for (int i = 0; i < n; i++) update_sketch.update(i);
|
265
|
-
REQUIRE(sketchptr->get_num_retained() == update_sketch.get_num_retained());
|
266
|
-
REQUIRE(sketchptr->get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
|
267
|
-
REQUIRE(sketchptr->get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
|
268
|
-
REQUIRE(sketchptr->get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
|
269
|
-
REQUIRE(sketchptr->get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
|
270
|
-
REQUIRE(sketchptr->get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
|
271
|
-
REQUIRE(sketchptr->get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
|
272
|
-
REQUIRE(sketchptr->get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
|
273
|
-
REQUIRE(sketchptr->get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
|
274
|
-
compact_theta_sketch compact_sketch = update_sketch.compact();
|
275
|
-
// the sketches are ordered, so the iteration sequence must match exactly
|
276
|
-
auto iter = sketchptr->begin();
|
277
|
-
for (auto key: compact_sketch) {
|
278
|
-
REQUIRE(*iter == key);
|
279
|
-
++iter;
|
280
|
-
}
|
281
|
-
}
|
282
|
-
|
283
|
-
TEST_CASE("theta sketch: deserialize compact estimation from java as subclass", "[theta_sketch]") {
|
165
|
+
TEST_CASE("theta sketch: deserialize compact estimation from java", "[theta_sketch]") {
|
284
166
|
std::ifstream is;
|
285
167
|
is.exceptions(std::ios::failbit | std::ios::badbit);
|
286
168
|
is.open(inputPath + "theta_compact_estimation_from_java.sk", std::ios::binary);
|
287
169
|
auto sketch = compact_theta_sketch::deserialize(is);
|
288
170
|
REQUIRE_FALSE(sketch.is_empty());
|
289
171
|
REQUIRE(sketch.is_estimation_mode());
|
172
|
+
REQUIRE(sketch.is_ordered());
|
290
173
|
REQUIRE(sketch.get_num_retained() == 4342);
|
291
174
|
REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
|
292
175
|
REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
|
293
176
|
REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
|
294
177
|
REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
|
295
178
|
|
179
|
+
// the same construction process in Java must have produced exactly the same sketch
|
296
180
|
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
297
181
|
const int n = 8192;
|
298
182
|
for (int i = 0; i < n; i++) update_sketch.update(i);
|
@@ -305,132 +189,51 @@ TEST_CASE("theta sketch: deserialize compact estimation from java as subclass",
|
|
305
189
|
REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
|
306
190
|
REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
|
307
191
|
REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
|
192
|
+
compact_theta_sketch compact_sketch = update_sketch.compact();
|
193
|
+
// the sketches are ordered, so the iteration sequence must match exactly
|
194
|
+
auto iter = sketch.begin();
|
195
|
+
for (const auto& key: compact_sketch) {
|
196
|
+
REQUIRE(*iter == key);
|
197
|
+
++iter;
|
198
|
+
}
|
308
199
|
}
|
309
200
|
|
310
|
-
TEST_CASE("theta sketch: serialize deserialize stream and bytes
|
201
|
+
TEST_CASE("theta sketch: serialize deserialize stream and bytes equivalence", "[theta_sketch]") {
|
311
202
|
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
312
203
|
const int n = 8192;
|
313
204
|
for (int i = 0; i < n; i++) update_sketch.update(i);
|
314
205
|
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
REQUIRE(bytes.
|
321
|
-
for (size_t i = 0; i < bytes.size(); ++i) {
|
322
|
-
REQUIRE(((char*)bytes.data())[i] == (char)s.get());
|
323
|
-
}
|
324
|
-
|
325
|
-
// deserialize as base class
|
326
|
-
{
|
327
|
-
s.seekg(0); // rewind
|
328
|
-
auto deserialized_sketch_ptr1 = theta_sketch::deserialize(s);
|
329
|
-
auto deserialized_sketch_ptr2 = theta_sketch::deserialize(bytes.data(), bytes.size());
|
330
|
-
REQUIRE(bytes.size() == static_cast<size_t>(s.tellg()));
|
331
|
-
REQUIRE(deserialized_sketch_ptr2->is_empty() == deserialized_sketch_ptr1->is_empty());
|
332
|
-
REQUIRE(deserialized_sketch_ptr2->is_ordered() == deserialized_sketch_ptr1->is_ordered());
|
333
|
-
REQUIRE(deserialized_sketch_ptr2->get_num_retained() == deserialized_sketch_ptr1->get_num_retained());
|
334
|
-
REQUIRE(deserialized_sketch_ptr2->get_theta() == deserialized_sketch_ptr1->get_theta());
|
335
|
-
REQUIRE(deserialized_sketch_ptr2->get_estimate() == deserialized_sketch_ptr1->get_estimate());
|
336
|
-
REQUIRE(deserialized_sketch_ptr2->get_lower_bound(1) == deserialized_sketch_ptr1->get_lower_bound(1));
|
337
|
-
REQUIRE(deserialized_sketch_ptr2->get_upper_bound(1) == deserialized_sketch_ptr1->get_upper_bound(1));
|
338
|
-
// hash tables must be identical since they are restored from dumps, and iteration is deterministic
|
339
|
-
auto iter = deserialized_sketch_ptr1->begin();
|
340
|
-
for (auto key: *deserialized_sketch_ptr2) {
|
341
|
-
REQUIRE(*iter == key);
|
342
|
-
++iter;
|
343
|
-
}
|
344
|
-
}
|
345
|
-
|
346
|
-
// deserialize as subclass
|
347
|
-
{
|
348
|
-
s.seekg(0); // rewind
|
349
|
-
update_theta_sketch deserialized_sketch1 = update_theta_sketch::deserialize(s);
|
350
|
-
update_theta_sketch deserialized_sketch2 = update_theta_sketch::deserialize(bytes.data(), bytes.size());
|
351
|
-
REQUIRE(bytes.size() == static_cast<size_t>(s.tellg()));
|
352
|
-
REQUIRE(deserialized_sketch2.is_empty() == deserialized_sketch1.is_empty());
|
353
|
-
REQUIRE(deserialized_sketch2.is_ordered() == deserialized_sketch1.is_ordered());
|
354
|
-
REQUIRE(deserialized_sketch2.get_num_retained() == deserialized_sketch1.get_num_retained());
|
355
|
-
REQUIRE(deserialized_sketch2.get_theta() == deserialized_sketch1.get_theta());
|
356
|
-
REQUIRE(deserialized_sketch2.get_estimate() == deserialized_sketch1.get_estimate());
|
357
|
-
REQUIRE(deserialized_sketch2.get_lower_bound(1) == deserialized_sketch1.get_lower_bound(1));
|
358
|
-
REQUIRE(deserialized_sketch2.get_upper_bound(1) == deserialized_sketch1.get_upper_bound(1));
|
359
|
-
// hash tables must be identical since they are restored from dumps, and iteration is deterministic
|
360
|
-
auto iter = deserialized_sketch1.begin();
|
361
|
-
for (auto key: deserialized_sketch2) {
|
362
|
-
REQUIRE(*iter == key);
|
363
|
-
++iter;
|
364
|
-
}
|
365
|
-
}
|
206
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
207
|
+
update_sketch.compact().serialize(s);
|
208
|
+
auto bytes = update_sketch.compact().serialize();
|
209
|
+
REQUIRE(bytes.size() == static_cast<size_t>(s.tellp()));
|
210
|
+
for (size_t i = 0; i < bytes.size(); ++i) {
|
211
|
+
REQUIRE(((char*)bytes.data())[i] == (char)s.get());
|
366
212
|
}
|
367
213
|
|
368
|
-
//
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
REQUIRE(deserialized_sketch_ptr2->is_empty() == deserialized_sketch_ptr1->is_empty());
|
385
|
-
REQUIRE(deserialized_sketch_ptr2->is_ordered() == deserialized_sketch_ptr1->is_ordered());
|
386
|
-
REQUIRE(deserialized_sketch_ptr2->get_num_retained() == deserialized_sketch_ptr1->get_num_retained());
|
387
|
-
REQUIRE(deserialized_sketch_ptr2->get_theta() == deserialized_sketch_ptr1->get_theta());
|
388
|
-
REQUIRE(deserialized_sketch_ptr2->get_estimate() == deserialized_sketch_ptr1->get_estimate());
|
389
|
-
REQUIRE(deserialized_sketch_ptr2->get_lower_bound(1) == deserialized_sketch_ptr1->get_lower_bound(1));
|
390
|
-
REQUIRE(deserialized_sketch_ptr2->get_upper_bound(1) == deserialized_sketch_ptr1->get_upper_bound(1));
|
391
|
-
// the sketches are ordered, so the iteration sequence must match exactly
|
392
|
-
auto iter = deserialized_sketch_ptr1->begin();
|
393
|
-
for (auto key: *deserialized_sketch_ptr2) {
|
394
|
-
REQUIRE(*iter == key);
|
395
|
-
++iter;
|
396
|
-
}
|
397
|
-
}
|
398
|
-
|
399
|
-
// deserialize as subclass
|
400
|
-
{
|
401
|
-
s.seekg(0); // rewind
|
402
|
-
compact_theta_sketch deserialized_sketch1 = compact_theta_sketch::deserialize(s);
|
403
|
-
compact_theta_sketch deserialized_sketch2 = compact_theta_sketch::deserialize(bytes.data(), bytes.size());
|
404
|
-
REQUIRE(bytes.size() == static_cast<size_t>(s.tellg()));
|
405
|
-
REQUIRE(deserialized_sketch2.is_empty() == deserialized_sketch1.is_empty());
|
406
|
-
REQUIRE(deserialized_sketch2.is_ordered() == deserialized_sketch1.is_ordered());
|
407
|
-
REQUIRE(deserialized_sketch2.get_num_retained() == deserialized_sketch1.get_num_retained());
|
408
|
-
REQUIRE(deserialized_sketch2.get_theta() == deserialized_sketch1.get_theta());
|
409
|
-
REQUIRE(deserialized_sketch2.get_estimate() == deserialized_sketch1.get_estimate());
|
410
|
-
REQUIRE(deserialized_sketch2.get_lower_bound(1) == deserialized_sketch1.get_lower_bound(1));
|
411
|
-
REQUIRE(deserialized_sketch2.get_upper_bound(1) == deserialized_sketch1.get_upper_bound(1));
|
412
|
-
// the sketches are ordered, so the iteration sequence must match exactly
|
413
|
-
auto iter = deserialized_sketch1.begin();
|
414
|
-
for (auto key: deserialized_sketch2) {
|
415
|
-
REQUIRE(*iter == key);
|
416
|
-
++iter;
|
417
|
-
}
|
418
|
-
}
|
214
|
+
s.seekg(0); // rewind
|
215
|
+
compact_theta_sketch deserialized_sketch1 = compact_theta_sketch::deserialize(s);
|
216
|
+
compact_theta_sketch deserialized_sketch2 = compact_theta_sketch::deserialize(bytes.data(), bytes.size());
|
217
|
+
REQUIRE(bytes.size() == static_cast<size_t>(s.tellg()));
|
218
|
+
REQUIRE(deserialized_sketch2.is_empty() == deserialized_sketch1.is_empty());
|
219
|
+
REQUIRE(deserialized_sketch2.is_ordered() == deserialized_sketch1.is_ordered());
|
220
|
+
REQUIRE(deserialized_sketch2.get_num_retained() == deserialized_sketch1.get_num_retained());
|
221
|
+
REQUIRE(deserialized_sketch2.get_theta() == deserialized_sketch1.get_theta());
|
222
|
+
REQUIRE(deserialized_sketch2.get_estimate() == deserialized_sketch1.get_estimate());
|
223
|
+
REQUIRE(deserialized_sketch2.get_lower_bound(1) == deserialized_sketch1.get_lower_bound(1));
|
224
|
+
REQUIRE(deserialized_sketch2.get_upper_bound(1) == deserialized_sketch1.get_upper_bound(1));
|
225
|
+
// the sketches are ordered, so the iteration sequence must match exactly
|
226
|
+
auto iter = deserialized_sketch1.begin();
|
227
|
+
for (auto key: deserialized_sketch2) {
|
228
|
+
REQUIRE(*iter == key);
|
229
|
+
++iter;
|
419
230
|
}
|
420
231
|
}
|
421
232
|
|
422
|
-
TEST_CASE("theta sketch: deserialize update single item buffer overrun", "[theta_sketch]") {
|
423
|
-
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
424
|
-
update_sketch.update(1);
|
425
|
-
theta_sketch::vector_bytes bytes = update_sketch.serialize();
|
426
|
-
REQUIRE_THROWS_AS(update_theta_sketch::deserialize(bytes.data(), 7), std::out_of_range);
|
427
|
-
REQUIRE_THROWS_AS(update_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
428
|
-
}
|
429
|
-
|
430
233
|
TEST_CASE("theta sketch: deserialize compact single item buffer overrun", "[theta_sketch]") {
|
431
234
|
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
432
235
|
update_sketch.update(1);
|
433
|
-
|
236
|
+
auto bytes = update_sketch.compact().serialize();
|
434
237
|
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 7), std::out_of_range);
|
435
238
|
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
436
239
|
}
|