datasketches 0.2.0 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/README.md +7 -7
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
- data/vendor/datasketches-cpp/python/README.md +50 -50
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +10 -7
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
- metadata +18 -7
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -26,68 +26,77 @@
|
|
26
26
|
#include "serde.hpp"
|
27
27
|
#include "binomial_bounds.hpp"
|
28
28
|
#include "theta_helpers.hpp"
|
29
|
+
#include "compact_theta_sketch_parser.hpp"
|
29
30
|
|
30
31
|
namespace datasketches {
|
31
32
|
|
32
33
|
template<typename A>
|
33
|
-
bool
|
34
|
+
bool base_theta_sketch_alloc<A>::is_estimation_mode() const {
|
34
35
|
return get_theta64() < theta_constants::MAX_THETA && !is_empty();
|
35
36
|
}
|
36
37
|
|
37
38
|
template<typename A>
|
38
|
-
double
|
39
|
+
double base_theta_sketch_alloc<A>::get_theta() const {
|
39
40
|
return static_cast<double>(get_theta64()) / theta_constants::MAX_THETA;
|
40
41
|
}
|
41
42
|
|
42
43
|
template<typename A>
|
43
|
-
double
|
44
|
+
double base_theta_sketch_alloc<A>::get_estimate() const {
|
44
45
|
return get_num_retained() / get_theta();
|
45
46
|
}
|
46
47
|
|
47
48
|
template<typename A>
|
48
|
-
double
|
49
|
+
double base_theta_sketch_alloc<A>::get_lower_bound(uint8_t num_std_devs) const {
|
49
50
|
if (!is_estimation_mode()) return get_num_retained();
|
50
51
|
return binomial_bounds::get_lower_bound(get_num_retained(), get_theta(), num_std_devs);
|
51
52
|
}
|
52
53
|
|
53
54
|
template<typename A>
|
54
|
-
double
|
55
|
+
double base_theta_sketch_alloc<A>::get_upper_bound(uint8_t num_std_devs) const {
|
55
56
|
if (!is_estimation_mode()) return get_num_retained();
|
56
57
|
return binomial_bounds::get_upper_bound(get_num_retained(), get_theta(), num_std_devs);
|
57
58
|
}
|
58
59
|
|
59
60
|
template<typename A>
|
60
|
-
string<A>
|
61
|
-
|
61
|
+
string<A> base_theta_sketch_alloc<A>::to_string(bool print_details) const {
|
62
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
63
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
64
|
+
std::ostringstream os;
|
62
65
|
os << "### Theta sketch summary:" << std::endl;
|
63
|
-
os << " num retained entries : " << get_num_retained() << std::endl;
|
64
|
-
os << " seed hash : " << get_seed_hash() << std::endl;
|
65
|
-
os << " empty? : " << (is_empty() ? "true" : "false") << std::endl;
|
66
|
-
os << " ordered? : " << (is_ordered() ? "true" : "false") << std::endl;
|
67
|
-
os << " estimation mode? : " << (is_estimation_mode() ? "true" : "false") << std::endl;
|
68
|
-
os << " theta (fraction) : " << get_theta() << std::endl;
|
69
|
-
os << " theta (raw 64-bit) : " << get_theta64() << std::endl;
|
66
|
+
os << " num retained entries : " << this->get_num_retained() << std::endl;
|
67
|
+
os << " seed hash : " << this->get_seed_hash() << std::endl;
|
68
|
+
os << " empty? : " << (this->is_empty() ? "true" : "false") << std::endl;
|
69
|
+
os << " ordered? : " << (this->is_ordered() ? "true" : "false") << std::endl;
|
70
|
+
os << " estimation mode? : " << (this->is_estimation_mode() ? "true" : "false") << std::endl;
|
71
|
+
os << " theta (fraction) : " << this->get_theta() << std::endl;
|
72
|
+
os << " theta (raw 64-bit) : " << this->get_theta64() << std::endl;
|
70
73
|
os << " estimate : " << this->get_estimate() << std::endl;
|
71
74
|
os << " lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
|
72
75
|
os << " upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
|
73
76
|
print_specifics(os);
|
74
77
|
os << "### End sketch summary" << std::endl;
|
75
|
-
if (
|
78
|
+
if (print_details) {
|
79
|
+
print_items(os);
|
80
|
+
}
|
81
|
+
return string<A>(os.str().c_str(), this->get_allocator());
|
82
|
+
}
|
83
|
+
|
84
|
+
template<typename A>
|
85
|
+
void theta_sketch_alloc<A>::print_items(std::ostringstream& os) const {
|
76
86
|
os << "### Retained entries" << std::endl;
|
77
87
|
for (const auto& hash: *this) {
|
78
88
|
os << hash << std::endl;
|
79
89
|
}
|
80
90
|
os << "### End retained entries" << std::endl;
|
81
|
-
}
|
82
|
-
return os.str();
|
83
91
|
}
|
84
92
|
|
93
|
+
|
85
94
|
// update sketch
|
86
95
|
|
87
96
|
template<typename A>
|
88
97
|
update_theta_sketch_alloc<A>::update_theta_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
|
89
|
-
uint64_t theta, uint64_t seed, const A& allocator):
|
90
|
-
table_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator)
|
98
|
+
float p, uint64_t theta, uint64_t seed, const A& allocator):
|
99
|
+
table_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator)
|
91
100
|
{}
|
92
101
|
|
93
102
|
template<typename A>
|
@@ -102,12 +111,12 @@ bool update_theta_sketch_alloc<A>::is_empty() const {
|
|
102
111
|
|
103
112
|
template<typename A>
|
104
113
|
bool update_theta_sketch_alloc<A>::is_ordered() const {
|
105
|
-
return false;
|
114
|
+
return table_.num_entries_ > 1 ? false : true;
|
106
115
|
}
|
107
116
|
|
108
117
|
template<typename A>
|
109
118
|
uint64_t update_theta_sketch_alloc<A>::get_theta64() const {
|
110
|
-
return table_.theta_;
|
119
|
+
return is_empty() ? theta_constants::MAX_THETA : table_.theta_;
|
111
120
|
}
|
112
121
|
|
113
122
|
template<typename A>
|
@@ -201,6 +210,11 @@ void update_theta_sketch_alloc<A>::trim() {
|
|
201
210
|
table_.trim();
|
202
211
|
}
|
203
212
|
|
213
|
+
template<typename A>
|
214
|
+
void update_theta_sketch_alloc<A>::reset() {
|
215
|
+
table_.reset();
|
216
|
+
}
|
217
|
+
|
204
218
|
template<typename A>
|
205
219
|
auto update_theta_sketch_alloc<A>::begin() -> iterator {
|
206
220
|
return iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
|
@@ -227,7 +241,7 @@ compact_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::compact(bool ordered
|
|
227
241
|
}
|
228
242
|
|
229
243
|
template<typename A>
|
230
|
-
void update_theta_sketch_alloc<A>::print_specifics(
|
244
|
+
void update_theta_sketch_alloc<A>::print_specifics(std::ostringstream& os) const {
|
231
245
|
os << " lg nominal size : " << static_cast<int>(table_.lg_nom_size_) << std::endl;
|
232
246
|
os << " lg current size : " << static_cast<int>(table_.lg_cur_size_) << std::endl;
|
233
247
|
os << " resize factor : " << (1 << table_.rf_) << std::endl;
|
@@ -240,29 +254,32 @@ update_theta_sketch_alloc<A>::builder::builder(const A& allocator): theta_base_b
|
|
240
254
|
|
241
255
|
template<typename A>
|
242
256
|
update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::builder::build() const {
|
243
|
-
return update_theta_sketch_alloc(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->allocator_);
|
257
|
+
return update_theta_sketch_alloc(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->allocator_);
|
244
258
|
}
|
245
259
|
|
246
260
|
// compact sketch
|
247
261
|
|
248
262
|
template<typename A>
|
249
|
-
|
263
|
+
template<typename Other>
|
264
|
+
compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(const Other& other, bool ordered):
|
250
265
|
is_empty_(other.is_empty()),
|
251
266
|
is_ordered_(other.is_ordered() || ordered),
|
252
267
|
seed_hash_(other.get_seed_hash()),
|
253
268
|
theta_(other.get_theta64()),
|
254
269
|
entries_(other.get_allocator())
|
255
270
|
{
|
256
|
-
|
257
|
-
|
258
|
-
|
271
|
+
if (!other.is_empty()) {
|
272
|
+
entries_.reserve(other.get_num_retained());
|
273
|
+
std::copy(other.begin(), other.end(), std::back_inserter(entries_));
|
274
|
+
if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end());
|
275
|
+
}
|
259
276
|
}
|
260
277
|
|
261
278
|
template<typename A>
|
262
279
|
compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
|
263
280
|
std::vector<uint64_t, A>&& entries):
|
264
281
|
is_empty_(is_empty),
|
265
|
-
is_ordered_(is_ordered),
|
282
|
+
is_ordered_(is_ordered || (entries.size() <= 1ULL)),
|
266
283
|
seed_hash_(seed_hash),
|
267
284
|
theta_(theta),
|
268
285
|
entries_(std::move(entries))
|
@@ -290,7 +307,7 @@ uint64_t compact_theta_sketch_alloc<A>::get_theta64() const {
|
|
290
307
|
|
291
308
|
template<typename A>
|
292
309
|
uint32_t compact_theta_sketch_alloc<A>::get_num_retained() const {
|
293
|
-
return entries_.size();
|
310
|
+
return static_cast<uint32_t>(entries_.size());
|
294
311
|
}
|
295
312
|
|
296
313
|
template<typename A>
|
@@ -300,58 +317,58 @@ uint16_t compact_theta_sketch_alloc<A>::get_seed_hash() const {
|
|
300
317
|
|
301
318
|
template<typename A>
|
302
319
|
auto compact_theta_sketch_alloc<A>::begin() -> iterator {
|
303
|
-
return iterator(entries_.data(), entries_.size(), 0);
|
320
|
+
return iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
|
304
321
|
}
|
305
322
|
|
306
323
|
template<typename A>
|
307
324
|
auto compact_theta_sketch_alloc<A>::end() -> iterator {
|
308
|
-
return iterator(nullptr, 0, entries_.size());
|
325
|
+
return iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
|
309
326
|
}
|
310
327
|
|
311
328
|
template<typename A>
|
312
329
|
auto compact_theta_sketch_alloc<A>::begin() const -> const_iterator {
|
313
|
-
return const_iterator(entries_.data(), entries_.size(), 0);
|
330
|
+
return const_iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
|
314
331
|
}
|
315
332
|
|
316
333
|
template<typename A>
|
317
334
|
auto compact_theta_sketch_alloc<A>::end() const -> const_iterator {
|
318
|
-
return const_iterator(nullptr, 0, entries_.size());
|
335
|
+
return const_iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
|
319
336
|
}
|
320
337
|
|
321
338
|
template<typename A>
|
322
|
-
void compact_theta_sketch_alloc<A>::print_specifics(
|
339
|
+
void compact_theta_sketch_alloc<A>::print_specifics(std::ostringstream&) const {}
|
323
340
|
|
324
341
|
template<typename A>
|
325
342
|
void compact_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
|
326
343
|
const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
|
327
344
|
const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
|
328
|
-
|
345
|
+
write(os, preamble_longs);
|
329
346
|
const uint8_t serial_version = SERIAL_VERSION;
|
330
|
-
|
347
|
+
write(os, serial_version);
|
331
348
|
const uint8_t type = SKETCH_TYPE;
|
332
|
-
|
349
|
+
write(os, type);
|
333
350
|
const uint16_t unused16 = 0;
|
334
|
-
|
351
|
+
write(os, unused16);
|
335
352
|
const uint8_t flags_byte(
|
336
353
|
(1 << flags::IS_COMPACT) |
|
337
354
|
(1 << flags::IS_READ_ONLY) |
|
338
355
|
(this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
|
339
356
|
(this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
|
340
357
|
);
|
341
|
-
|
358
|
+
write(os, flags_byte);
|
342
359
|
const uint16_t seed_hash = get_seed_hash();
|
343
|
-
|
360
|
+
write(os, seed_hash);
|
344
361
|
if (!this->is_empty()) {
|
345
362
|
if (!is_single_item) {
|
346
|
-
const uint32_t num_entries = entries_.size();
|
347
|
-
|
363
|
+
const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
|
364
|
+
write(os, num_entries);
|
348
365
|
const uint32_t unused32 = 0;
|
349
|
-
|
366
|
+
write(os, unused32);
|
350
367
|
if (this->is_estimation_mode()) {
|
351
|
-
|
368
|
+
write(os, this->theta_);
|
352
369
|
}
|
353
370
|
}
|
354
|
-
|
371
|
+
write(os, entries_.data(), entries_.size() * sizeof(uint64_t));
|
355
372
|
}
|
356
373
|
}
|
357
374
|
|
@@ -364,30 +381,28 @@ auto compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const
|
|
364
381
|
vector_bytes bytes(size, 0, entries_.get_allocator());
|
365
382
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
366
383
|
|
367
|
-
ptr += copy_to_mem(
|
384
|
+
ptr += copy_to_mem(preamble_longs, ptr);
|
368
385
|
const uint8_t serial_version = SERIAL_VERSION;
|
369
|
-
ptr += copy_to_mem(
|
386
|
+
ptr += copy_to_mem(serial_version, ptr);
|
370
387
|
const uint8_t type = SKETCH_TYPE;
|
371
|
-
ptr += copy_to_mem(
|
372
|
-
|
373
|
-
ptr += copy_to_mem(&unused16, ptr, sizeof(unused16));
|
388
|
+
ptr += copy_to_mem(type, ptr);
|
389
|
+
ptr += sizeof(uint16_t); // unused
|
374
390
|
const uint8_t flags_byte(
|
375
391
|
(1 << flags::IS_COMPACT) |
|
376
392
|
(1 << flags::IS_READ_ONLY) |
|
377
393
|
(this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
|
378
394
|
(this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
|
379
395
|
);
|
380
|
-
ptr += copy_to_mem(
|
396
|
+
ptr += copy_to_mem(flags_byte, ptr);
|
381
397
|
const uint16_t seed_hash = get_seed_hash();
|
382
|
-
ptr += copy_to_mem(
|
398
|
+
ptr += copy_to_mem(seed_hash, ptr);
|
383
399
|
if (!this->is_empty()) {
|
384
400
|
if (!is_single_item) {
|
385
|
-
const uint32_t num_entries = entries_.size();
|
386
|
-
ptr += copy_to_mem(
|
387
|
-
|
388
|
-
ptr += copy_to_mem(&unused32, ptr, sizeof(unused32));
|
401
|
+
const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
|
402
|
+
ptr += copy_to_mem(num_entries, ptr);
|
403
|
+
ptr += sizeof(uint32_t);
|
389
404
|
if (this->is_estimation_mode()) {
|
390
|
-
ptr += copy_to_mem(
|
405
|
+
ptr += copy_to_mem(theta_, ptr);
|
391
406
|
}
|
392
407
|
}
|
393
408
|
ptr += copy_to_mem(entries_.data(), ptr, entries_.size() * sizeof(uint64_t));
|
@@ -397,43 +412,104 @@ auto compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const
|
|
397
412
|
|
398
413
|
template<typename A>
|
399
414
|
compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) {
|
400
|
-
|
401
|
-
|
402
|
-
uint8_t
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
415
|
+
const auto preamble_longs = read<uint8_t>(is);
|
416
|
+
const auto serial_version = read<uint8_t>(is);
|
417
|
+
const auto type = read<uint8_t>(is);
|
418
|
+
switch (serial_version) {
|
419
|
+
case SERIAL_VERSION: {
|
420
|
+
read<uint16_t>(is); // unused
|
421
|
+
const auto flags_byte = read<uint8_t>(is);
|
422
|
+
const auto seed_hash = read<uint16_t>(is);
|
423
|
+
checker<true>::check_sketch_type(type, SKETCH_TYPE);
|
424
|
+
checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
|
425
|
+
const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
|
426
|
+
if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
|
427
|
+
|
428
|
+
uint64_t theta = theta_constants::MAX_THETA;
|
429
|
+
uint32_t num_entries = 0;
|
430
|
+
if (!is_empty) {
|
431
|
+
if (preamble_longs == 1) {
|
432
|
+
num_entries = 1;
|
433
|
+
} else {
|
434
|
+
num_entries = read<uint32_t>(is);
|
435
|
+
read<uint32_t>(is); // unused
|
436
|
+
if (preamble_longs > 2) {
|
437
|
+
theta = read<uint64_t>(is);
|
438
|
+
}
|
439
|
+
}
|
440
|
+
}
|
441
|
+
std::vector<uint64_t, A> entries(num_entries, 0, allocator);
|
442
|
+
if (!is_empty) read(is, entries.data(), sizeof(uint64_t) * entries.size());
|
416
443
|
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
444
|
+
const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
|
445
|
+
if (!is.good()) throw std::runtime_error("error reading from std::istream");
|
446
|
+
return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
|
447
|
+
}
|
448
|
+
case 1: {
|
449
|
+
const auto seed_hash = compute_seed_hash(seed);
|
450
|
+
checker<true>::check_sketch_type(type, SKETCH_TYPE);
|
451
|
+
read<uint8_t>(is); // unused
|
452
|
+
read<uint32_t>(is); // unused
|
453
|
+
const auto num_entries = read<uint32_t>(is);
|
454
|
+
read<uint32_t>(is); //unused
|
455
|
+
const auto theta = read<uint64_t>(is);
|
456
|
+
std::vector<uint64_t> entries(num_entries, 0, allocator);
|
457
|
+
bool is_empty = (num_entries == 0) && (theta == theta_constants::MAX_THETA);
|
458
|
+
if (!is_empty)
|
459
|
+
read(is, entries.data(), sizeof(uint64_t) * entries.size());
|
460
|
+
if (!is.good())
|
461
|
+
throw std::runtime_error("error reading from std::istream");
|
462
|
+
return compact_theta_sketch_alloc(is_empty, true, seed_hash, theta, std::move(entries));
|
463
|
+
}
|
464
|
+
case 2: {
|
465
|
+
checker<true>::check_sketch_type(type, SKETCH_TYPE);
|
466
|
+
read<uint8_t>(is); // unused
|
467
|
+
read<uint16_t>(is); // unused
|
468
|
+
const uint16_t seed_hash = read<uint16_t>(is);
|
469
|
+
checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
|
470
|
+
if (preamble_longs == 1) {
|
471
|
+
if (!is.good())
|
472
|
+
throw std::runtime_error("error reading from std::istream");
|
473
|
+
std::vector<uint64_t> entries(0, 0, allocator);
|
474
|
+
return compact_theta_sketch_alloc(true, true, seed_hash, theta_constants::MAX_THETA, std::move(entries));
|
475
|
+
} else if (preamble_longs == 2) {
|
476
|
+
const uint32_t num_entries = read<uint32_t>(is);
|
477
|
+
read<uint32_t>(is); // unused
|
478
|
+
std::vector<uint64_t> entries(num_entries, 0, allocator);
|
479
|
+
if (num_entries == 0) {
|
480
|
+
return compact_theta_sketch_alloc(true, true, seed_hash, theta_constants::MAX_THETA, std::move(entries));
|
481
|
+
}
|
482
|
+
read(is, entries.data(), entries.size() * sizeof(uint64_t));
|
483
|
+
if (!is.good())
|
484
|
+
throw std::runtime_error("error reading from std::istream");
|
485
|
+
return compact_theta_sketch_alloc(false, true, seed_hash, theta_constants::MAX_THETA, std::move(entries));
|
486
|
+
} else if (preamble_longs == 3) {
|
487
|
+
const uint32_t num_entries = read<uint32_t>(is);
|
488
|
+
read<uint32_t>(is); // unused
|
489
|
+
const auto theta = read<uint64_t>(is);
|
490
|
+
bool is_empty = (num_entries == 0) && (theta == theta_constants::MAX_THETA);
|
491
|
+
std::vector<uint64_t> entries(num_entries, 0, allocator);
|
492
|
+
if (is_empty) {
|
493
|
+
if (!is.good())
|
494
|
+
throw std::runtime_error("error reading from std::istream");
|
495
|
+
return compact_theta_sketch_alloc(true, true, seed_hash, theta, std::move(entries));
|
496
|
+
} else {
|
497
|
+
read(is, entries.data(), sizeof(uint64_t) * entries.size());
|
498
|
+
if (!is.good())
|
499
|
+
throw std::runtime_error("error reading from std::istream");
|
500
|
+
return compact_theta_sketch_alloc(false, true, seed_hash, theta, std::move(entries));
|
501
|
+
}
|
502
|
+
} else {
|
503
|
+
throw std::invalid_argument(std::to_string(preamble_longs) + " longs of premable, but expected 1, 2, or 3");
|
428
504
|
}
|
429
|
-
}
|
430
505
|
}
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
506
|
+
default:
|
507
|
+
// this should always fail since the valid cases are handled above
|
508
|
+
checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
|
509
|
+
// this throw is never reached, because check_serial_version will throw an informative exception.
|
510
|
+
// This is only here to avoid a compiler warning about a path without a return value.
|
511
|
+
throw std::invalid_argument("unexpected sketch serialization version");
|
512
|
+
}
|
437
513
|
}
|
438
514
|
|
439
515
|
template<typename A>
|
@@ -442,17 +518,16 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const v
|
|
442
518
|
const char* ptr = static_cast<const char*>(bytes);
|
443
519
|
const char* base = ptr;
|
444
520
|
uint8_t preamble_longs;
|
445
|
-
ptr += copy_from_mem(ptr,
|
521
|
+
ptr += copy_from_mem(ptr, preamble_longs);
|
446
522
|
uint8_t serial_version;
|
447
|
-
ptr += copy_from_mem(ptr,
|
523
|
+
ptr += copy_from_mem(ptr, serial_version);
|
448
524
|
uint8_t type;
|
449
|
-
ptr += copy_from_mem(ptr,
|
450
|
-
uint16_t
|
451
|
-
ptr += copy_from_mem(ptr, &unused16, sizeof(unused16));
|
525
|
+
ptr += copy_from_mem(ptr, type);
|
526
|
+
ptr += sizeof(uint16_t); // unused
|
452
527
|
uint8_t flags_byte;
|
453
|
-
ptr += copy_from_mem(ptr,
|
528
|
+
ptr += copy_from_mem(ptr, flags_byte);
|
454
529
|
uint16_t seed_hash;
|
455
|
-
ptr += copy_from_mem(ptr,
|
530
|
+
ptr += copy_from_mem(ptr, seed_hash);
|
456
531
|
checker<true>::check_sketch_type(type, SKETCH_TYPE);
|
457
532
|
checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
|
458
533
|
const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
|
@@ -465,12 +540,11 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const v
|
|
465
540
|
num_entries = 1;
|
466
541
|
} else {
|
467
542
|
ensure_minimum_memory(size, 8); // read the first prelong before this method
|
468
|
-
ptr += copy_from_mem(ptr,
|
469
|
-
uint32_t
|
470
|
-
ptr += copy_from_mem(ptr, &unused32, sizeof(unused32));
|
543
|
+
ptr += copy_from_mem(ptr, num_entries);
|
544
|
+
ptr += sizeof(uint32_t); // unused
|
471
545
|
if (preamble_longs > 2) {
|
472
546
|
ensure_minimum_memory(size, (preamble_longs - 1) << 3);
|
473
|
-
ptr += copy_from_mem(ptr,
|
547
|
+
ptr += copy_from_mem(ptr, theta);
|
474
548
|
}
|
475
549
|
}
|
476
550
|
}
|
@@ -483,7 +557,77 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const v
|
|
483
557
|
return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
|
484
558
|
}
|
485
559
|
|
560
|
+
// wrapped compact sketch
|
561
|
+
|
562
|
+
template<typename A>
|
563
|
+
wrapped_compact_theta_sketch_alloc<A>::wrapped_compact_theta_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint32_t num_entries,
|
564
|
+
uint64_t theta, const uint64_t* entries):
|
565
|
+
is_empty_(is_empty),
|
566
|
+
is_ordered_(is_ordered),
|
567
|
+
seed_hash_(seed_hash),
|
568
|
+
num_entries_(num_entries),
|
569
|
+
theta_(theta),
|
570
|
+
entries_(entries)
|
571
|
+
{}
|
572
|
+
|
573
|
+
template<typename A>
|
574
|
+
const wrapped_compact_theta_sketch_alloc<A> wrapped_compact_theta_sketch_alloc<A>::wrap(const void* bytes, size_t size, uint64_t seed, bool dump_on_error) {
|
575
|
+
auto data = compact_theta_sketch_parser<true>::parse(bytes, size, seed, dump_on_error);
|
576
|
+
return wrapped_compact_theta_sketch_alloc(data.is_empty, data.is_ordered, data.seed_hash, data.num_entries, data.theta, data.entries);
|
577
|
+
}
|
578
|
+
|
579
|
+
template<typename A>
|
580
|
+
A wrapped_compact_theta_sketch_alloc<A>::get_allocator() const {
|
581
|
+
return A();
|
582
|
+
}
|
583
|
+
|
584
|
+
template<typename A>
|
585
|
+
bool wrapped_compact_theta_sketch_alloc<A>::is_empty() const {
|
586
|
+
return is_empty_;
|
587
|
+
}
|
588
|
+
|
589
|
+
template<typename A>
|
590
|
+
bool wrapped_compact_theta_sketch_alloc<A>::is_ordered() const {
|
591
|
+
return is_ordered_;
|
592
|
+
}
|
593
|
+
|
594
|
+
template<typename A>
|
595
|
+
uint64_t wrapped_compact_theta_sketch_alloc<A>::get_theta64() const {
|
596
|
+
return theta_;
|
597
|
+
}
|
598
|
+
|
599
|
+
template<typename A>
|
600
|
+
uint32_t wrapped_compact_theta_sketch_alloc<A>::get_num_retained() const {
|
601
|
+
return static_cast<uint32_t>(num_entries_);
|
602
|
+
}
|
603
|
+
|
604
|
+
template<typename A>
|
605
|
+
uint16_t wrapped_compact_theta_sketch_alloc<A>::get_seed_hash() const {
|
606
|
+
return seed_hash_;
|
607
|
+
}
|
608
|
+
|
609
|
+
template<typename A>
|
610
|
+
auto wrapped_compact_theta_sketch_alloc<A>::begin() const -> const_iterator {
|
611
|
+
return entries_;
|
612
|
+
}
|
613
|
+
|
614
|
+
template<typename A>
|
615
|
+
auto wrapped_compact_theta_sketch_alloc<A>::end() const -> const_iterator {
|
616
|
+
return entries_ + num_entries_;
|
617
|
+
}
|
618
|
+
|
619
|
+
template<typename A>
|
620
|
+
void wrapped_compact_theta_sketch_alloc<A>::print_specifics(std::ostringstream&) const {}
|
621
|
+
|
622
|
+
template<typename A>
|
623
|
+
void wrapped_compact_theta_sketch_alloc<A>::print_items(std::ostringstream& os) const {
|
624
|
+
os << "### Retained entries" << std::endl;
|
625
|
+
for (const auto& hash: *this) {
|
626
|
+
os << hash << std::endl;
|
627
|
+
}
|
628
|
+
os << "### End retained entries" << std::endl;
|
629
|
+
}
|
630
|
+
|
486
631
|
} /* namespace datasketches */
|
487
632
|
|
488
633
|
#endif
|
489
|
-
|
@@ -35,13 +35,13 @@ public:
|
|
35
35
|
using CompactSketch = compact_theta_sketch_alloc<Allocator>;
|
36
36
|
using resize_factor = theta_constants::resize_factor;
|
37
37
|
|
38
|
-
struct
|
39
|
-
|
38
|
+
struct nop_policy {
|
39
|
+
void operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
|
40
|
+
unused(internal_entry);
|
40
41
|
unused(incoming_entry);
|
41
|
-
return internal_entry;
|
42
42
|
}
|
43
43
|
};
|
44
|
-
using State = theta_union_base<Entry, ExtractKey,
|
44
|
+
using State = theta_union_base<Entry, ExtractKey, nop_policy, Sketch, CompactSketch, Allocator>;
|
45
45
|
|
46
46
|
// No constructor here. Use builder instead.
|
47
47
|
class builder;
|
@@ -60,11 +60,16 @@ public:
|
|
60
60
|
*/
|
61
61
|
CompactSketch get_result(bool ordered = true) const;
|
62
62
|
|
63
|
+
/**
|
64
|
+
* Reset the union to the initial empty state
|
65
|
+
*/
|
66
|
+
void reset();
|
67
|
+
|
63
68
|
private:
|
64
69
|
State state_;
|
65
70
|
|
66
71
|
// for builder
|
67
|
-
theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Allocator& allocator);
|
72
|
+
theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Allocator& allocator);
|
68
73
|
};
|
69
74
|
|
70
75
|
template<typename A>
|
@@ -38,7 +38,7 @@ public:
|
|
38
38
|
using resize_factor = typename hash_table::resize_factor;
|
39
39
|
using comparator = compare_by_key<ExtractKey>;
|
40
40
|
|
41
|
-
theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
41
|
+
theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
42
42
|
|
43
43
|
template<typename FwdSketch>
|
44
44
|
void update(FwdSketch&& sketch);
|
@@ -47,6 +47,8 @@ public:
|
|
47
47
|
|
48
48
|
const Policy& get_policy() const;
|
49
49
|
|
50
|
+
void reset();
|
51
|
+
|
50
52
|
private:
|
51
53
|
Policy policy_;
|
52
54
|
hash_table table_;
|
@@ -28,9 +28,9 @@ namespace datasketches {
|
|
28
28
|
|
29
29
|
template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
|
30
30
|
theta_union_base<EN, EK, P, S, CS, A>::theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
|
31
|
-
uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
|
31
|
+
float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
|
32
32
|
policy_(policy),
|
33
|
-
table_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator),
|
33
|
+
table_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator),
|
34
34
|
union_theta_(table_.theta_)
|
35
35
|
{}
|
36
36
|
|
@@ -43,7 +43,7 @@ void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
|
|
43
43
|
if (sketch.get_theta64() < union_theta_) union_theta_ = sketch.get_theta64();
|
44
44
|
for (auto& entry: sketch) {
|
45
45
|
const uint64_t hash = EK()(entry);
|
46
|
-
if (hash < union_theta_) {
|
46
|
+
if (hash < union_theta_ && hash < table_.theta_) {
|
47
47
|
auto result = table_.find(hash);
|
48
48
|
if (!result.second) {
|
49
49
|
table_.insert(result.first, conditional_forward<SS>(entry));
|
@@ -84,6 +84,12 @@ const P& theta_union_base<EN, EK, P, S, CS, A>::get_policy() const {
|
|
84
84
|
return policy_;
|
85
85
|
}
|
86
86
|
|
87
|
+
template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
|
88
|
+
void theta_union_base<EN, EK, P, S, CS, A>::reset() {
|
89
|
+
table_.reset();
|
90
|
+
union_theta_ = table_.theta_;
|
91
|
+
}
|
92
|
+
|
87
93
|
} /* namespace datasketches */
|
88
94
|
|
89
95
|
#endif
|