datasketches 0.2.2 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/README.md +8 -8
- data/ext/datasketches/kll_wrapper.cpp +5 -1
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +27 -5
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +18 -13
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +16 -0
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +10 -6
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +30 -12
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +5 -19
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +103 -44
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +110 -130
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +156 -23
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +17 -6
- data/vendor/datasketches-cpp/python/README.md +57 -50
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +49 -14
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +12 -5
- data/vendor/datasketches-cpp/python/tests/kll_test.py +12 -6
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
- data/vendor/datasketches-cpp/req/CMakeLists.txt +6 -21
- data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +66 -61
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +54 -12
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +45 -34
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +33 -15
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
- data/vendor/datasketches-cpp/setup.py +10 -7
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +92 -23
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +3 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +150 -93
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -2
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +9 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +39 -10
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +446 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +429 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -11
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +29 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +34 -14
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +8 -0
- metadata +34 -12
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
|
@@ -22,6 +22,7 @@
|
|
|
22
22
|
|
|
23
23
|
#include <sstream>
|
|
24
24
|
#include <vector>
|
|
25
|
+
#include <stdexcept>
|
|
25
26
|
|
|
26
27
|
#include "serde.hpp"
|
|
27
28
|
#include "binomial_bounds.hpp"
|
|
@@ -31,64 +32,72 @@
|
|
|
31
32
|
namespace datasketches {
|
|
32
33
|
|
|
33
34
|
template<typename A>
|
|
34
|
-
bool
|
|
35
|
+
bool base_theta_sketch_alloc<A>::is_estimation_mode() const {
|
|
35
36
|
return get_theta64() < theta_constants::MAX_THETA && !is_empty();
|
|
36
37
|
}
|
|
37
38
|
|
|
38
39
|
template<typename A>
|
|
39
|
-
double
|
|
40
|
+
double base_theta_sketch_alloc<A>::get_theta() const {
|
|
40
41
|
return static_cast<double>(get_theta64()) / theta_constants::MAX_THETA;
|
|
41
42
|
}
|
|
42
43
|
|
|
43
44
|
template<typename A>
|
|
44
|
-
double
|
|
45
|
+
double base_theta_sketch_alloc<A>::get_estimate() const {
|
|
45
46
|
return get_num_retained() / get_theta();
|
|
46
47
|
}
|
|
47
48
|
|
|
48
49
|
template<typename A>
|
|
49
|
-
double
|
|
50
|
+
double base_theta_sketch_alloc<A>::get_lower_bound(uint8_t num_std_devs) const {
|
|
50
51
|
if (!is_estimation_mode()) return get_num_retained();
|
|
51
52
|
return binomial_bounds::get_lower_bound(get_num_retained(), get_theta(), num_std_devs);
|
|
52
53
|
}
|
|
53
54
|
|
|
54
55
|
template<typename A>
|
|
55
|
-
double
|
|
56
|
+
double base_theta_sketch_alloc<A>::get_upper_bound(uint8_t num_std_devs) const {
|
|
56
57
|
if (!is_estimation_mode()) return get_num_retained();
|
|
57
58
|
return binomial_bounds::get_upper_bound(get_num_retained(), get_theta(), num_std_devs);
|
|
58
59
|
}
|
|
59
60
|
|
|
60
61
|
template<typename A>
|
|
61
|
-
string<A>
|
|
62
|
-
|
|
62
|
+
string<A> base_theta_sketch_alloc<A>::to_string(bool print_details) const {
|
|
63
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
|
64
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
|
65
|
+
std::ostringstream os;
|
|
63
66
|
os << "### Theta sketch summary:" << std::endl;
|
|
64
|
-
os << " num retained entries : " << get_num_retained() << std::endl;
|
|
65
|
-
os << " seed hash : " << get_seed_hash() << std::endl;
|
|
66
|
-
os << " empty? : " << (is_empty() ? "true" : "false") << std::endl;
|
|
67
|
-
os << " ordered? : " << (is_ordered() ? "true" : "false") << std::endl;
|
|
68
|
-
os << " estimation mode? : " << (is_estimation_mode() ? "true" : "false") << std::endl;
|
|
69
|
-
os << " theta (fraction) : " << get_theta() << std::endl;
|
|
70
|
-
os << " theta (raw 64-bit) : " << get_theta64() << std::endl;
|
|
67
|
+
os << " num retained entries : " << this->get_num_retained() << std::endl;
|
|
68
|
+
os << " seed hash : " << this->get_seed_hash() << std::endl;
|
|
69
|
+
os << " empty? : " << (this->is_empty() ? "true" : "false") << std::endl;
|
|
70
|
+
os << " ordered? : " << (this->is_ordered() ? "true" : "false") << std::endl;
|
|
71
|
+
os << " estimation mode? : " << (this->is_estimation_mode() ? "true" : "false") << std::endl;
|
|
72
|
+
os << " theta (fraction) : " << this->get_theta() << std::endl;
|
|
73
|
+
os << " theta (raw 64-bit) : " << this->get_theta64() << std::endl;
|
|
71
74
|
os << " estimate : " << this->get_estimate() << std::endl;
|
|
72
75
|
os << " lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
|
|
73
76
|
os << " upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
|
|
74
77
|
print_specifics(os);
|
|
75
78
|
os << "### End sketch summary" << std::endl;
|
|
76
|
-
if (
|
|
79
|
+
if (print_details) {
|
|
80
|
+
print_items(os);
|
|
81
|
+
}
|
|
82
|
+
return string<A>(os.str().c_str(), this->get_allocator());
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
template<typename A>
|
|
86
|
+
void theta_sketch_alloc<A>::print_items(std::ostringstream& os) const {
|
|
77
87
|
os << "### Retained entries" << std::endl;
|
|
78
88
|
for (const auto& hash: *this) {
|
|
79
89
|
os << hash << std::endl;
|
|
80
90
|
}
|
|
81
91
|
os << "### End retained entries" << std::endl;
|
|
82
|
-
}
|
|
83
|
-
return os.str();
|
|
84
92
|
}
|
|
85
93
|
|
|
94
|
+
|
|
86
95
|
// update sketch
|
|
87
96
|
|
|
88
97
|
template<typename A>
|
|
89
98
|
update_theta_sketch_alloc<A>::update_theta_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
|
|
90
|
-
uint64_t theta, uint64_t seed, const A& allocator):
|
|
91
|
-
table_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator)
|
|
99
|
+
float p, uint64_t theta, uint64_t seed, const A& allocator):
|
|
100
|
+
table_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator)
|
|
92
101
|
{}
|
|
93
102
|
|
|
94
103
|
template<typename A>
|
|
@@ -103,12 +112,12 @@ bool update_theta_sketch_alloc<A>::is_empty() const {
|
|
|
103
112
|
|
|
104
113
|
template<typename A>
|
|
105
114
|
bool update_theta_sketch_alloc<A>::is_ordered() const {
|
|
106
|
-
return false;
|
|
115
|
+
return table_.num_entries_ > 1 ? false : true;
|
|
107
116
|
}
|
|
108
117
|
|
|
109
118
|
template<typename A>
|
|
110
119
|
uint64_t update_theta_sketch_alloc<A>::get_theta64() const {
|
|
111
|
-
return table_.theta_;
|
|
120
|
+
return is_empty() ? theta_constants::MAX_THETA : table_.theta_;
|
|
112
121
|
}
|
|
113
122
|
|
|
114
123
|
template<typename A>
|
|
@@ -202,6 +211,11 @@ void update_theta_sketch_alloc<A>::trim() {
|
|
|
202
211
|
table_.trim();
|
|
203
212
|
}
|
|
204
213
|
|
|
214
|
+
template<typename A>
|
|
215
|
+
void update_theta_sketch_alloc<A>::reset() {
|
|
216
|
+
table_.reset();
|
|
217
|
+
}
|
|
218
|
+
|
|
205
219
|
template<typename A>
|
|
206
220
|
auto update_theta_sketch_alloc<A>::begin() -> iterator {
|
|
207
221
|
return iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
|
|
@@ -228,7 +242,7 @@ compact_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::compact(bool ordered
|
|
|
228
242
|
}
|
|
229
243
|
|
|
230
244
|
template<typename A>
|
|
231
|
-
void update_theta_sketch_alloc<A>::print_specifics(
|
|
245
|
+
void update_theta_sketch_alloc<A>::print_specifics(std::ostringstream& os) const {
|
|
232
246
|
os << " lg nominal size : " << static_cast<int>(table_.lg_nom_size_) << std::endl;
|
|
233
247
|
os << " lg current size : " << static_cast<int>(table_.lg_cur_size_) << std::endl;
|
|
234
248
|
os << " resize factor : " << (1 << table_.rf_) << std::endl;
|
|
@@ -241,7 +255,7 @@ update_theta_sketch_alloc<A>::builder::builder(const A& allocator): theta_base_b
|
|
|
241
255
|
|
|
242
256
|
template<typename A>
|
|
243
257
|
update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::builder::build() const {
|
|
244
|
-
return update_theta_sketch_alloc(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->allocator_);
|
|
258
|
+
return update_theta_sketch_alloc(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->allocator_);
|
|
245
259
|
}
|
|
246
260
|
|
|
247
261
|
// compact sketch
|
|
@@ -255,16 +269,18 @@ seed_hash_(other.get_seed_hash()),
|
|
|
255
269
|
theta_(other.get_theta64()),
|
|
256
270
|
entries_(other.get_allocator())
|
|
257
271
|
{
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
272
|
+
if (!other.is_empty()) {
|
|
273
|
+
entries_.reserve(other.get_num_retained());
|
|
274
|
+
std::copy(other.begin(), other.end(), std::back_inserter(entries_));
|
|
275
|
+
if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end());
|
|
276
|
+
}
|
|
261
277
|
}
|
|
262
278
|
|
|
263
279
|
template<typename A>
|
|
264
280
|
compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
|
|
265
281
|
std::vector<uint64_t, A>&& entries):
|
|
266
282
|
is_empty_(is_empty),
|
|
267
|
-
is_ordered_(is_ordered),
|
|
283
|
+
is_ordered_(is_ordered || (entries.size() <= 1ULL)),
|
|
268
284
|
seed_hash_(seed_hash),
|
|
269
285
|
theta_(theta),
|
|
270
286
|
entries_(std::move(entries))
|
|
@@ -321,7 +337,7 @@ auto compact_theta_sketch_alloc<A>::end() const -> const_iterator {
|
|
|
321
337
|
}
|
|
322
338
|
|
|
323
339
|
template<typename A>
|
|
324
|
-
void compact_theta_sketch_alloc<A>::print_specifics(
|
|
340
|
+
void compact_theta_sketch_alloc<A>::print_specifics(std::ostringstream&) const {}
|
|
325
341
|
|
|
326
342
|
template<typename A>
|
|
327
343
|
void compact_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
|
|
@@ -400,78 +416,107 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(std::is
|
|
|
400
416
|
const auto preamble_longs = read<uint8_t>(is);
|
|
401
417
|
const auto serial_version = read<uint8_t>(is);
|
|
402
418
|
const auto type = read<uint8_t>(is);
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
419
|
+
switch (serial_version) {
|
|
420
|
+
case SERIAL_VERSION: {
|
|
421
|
+
read<uint16_t>(is); // unused
|
|
422
|
+
const auto flags_byte = read<uint8_t>(is);
|
|
423
|
+
const auto seed_hash = read<uint16_t>(is);
|
|
424
|
+
checker<true>::check_sketch_type(type, SKETCH_TYPE);
|
|
425
|
+
checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
|
|
426
|
+
const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
|
|
427
|
+
if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
|
|
428
|
+
|
|
429
|
+
uint64_t theta = theta_constants::MAX_THETA;
|
|
430
|
+
uint32_t num_entries = 0;
|
|
431
|
+
if (!is_empty) {
|
|
432
|
+
if (preamble_longs == 1) {
|
|
433
|
+
num_entries = 1;
|
|
434
|
+
} else {
|
|
435
|
+
num_entries = read<uint32_t>(is);
|
|
436
|
+
read<uint32_t>(is); // unused
|
|
437
|
+
if (preamble_longs > 2) {
|
|
438
|
+
theta = read<uint64_t>(is);
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
std::vector<uint64_t, A> entries(num_entries, 0, allocator);
|
|
443
|
+
if (!is_empty) read(is, entries.data(), sizeof(uint64_t) * entries.size());
|
|
444
|
+
|
|
445
|
+
const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
|
|
446
|
+
if (!is.good()) throw std::runtime_error("error reading from std::istream");
|
|
447
|
+
return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
|
|
448
|
+
}
|
|
449
|
+
case 1: {
|
|
450
|
+
const auto seed_hash = compute_seed_hash(seed);
|
|
451
|
+
checker<true>::check_sketch_type(type, SKETCH_TYPE);
|
|
452
|
+
read<uint8_t>(is); // unused
|
|
418
453
|
read<uint32_t>(is); // unused
|
|
419
|
-
|
|
420
|
-
|
|
454
|
+
const auto num_entries = read<uint32_t>(is);
|
|
455
|
+
read<uint32_t>(is); //unused
|
|
456
|
+
const auto theta = read<uint64_t>(is);
|
|
457
|
+
std::vector<uint64_t, A> entries(num_entries, 0, allocator);
|
|
458
|
+
bool is_empty = (num_entries == 0) && (theta == theta_constants::MAX_THETA);
|
|
459
|
+
if (!is_empty)
|
|
460
|
+
read(is, entries.data(), sizeof(uint64_t) * entries.size());
|
|
461
|
+
if (!is.good())
|
|
462
|
+
throw std::runtime_error("error reading from std::istream");
|
|
463
|
+
return compact_theta_sketch_alloc(is_empty, true, seed_hash, theta, std::move(entries));
|
|
464
|
+
}
|
|
465
|
+
case 2: {
|
|
466
|
+
checker<true>::check_sketch_type(type, SKETCH_TYPE);
|
|
467
|
+
read<uint8_t>(is); // unused
|
|
468
|
+
read<uint16_t>(is); // unused
|
|
469
|
+
const uint16_t seed_hash = read<uint16_t>(is);
|
|
470
|
+
checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
|
|
471
|
+
if (preamble_longs == 1) {
|
|
472
|
+
if (!is.good())
|
|
473
|
+
throw std::runtime_error("error reading from std::istream");
|
|
474
|
+
std::vector<uint64_t, A> entries(0, 0, allocator);
|
|
475
|
+
return compact_theta_sketch_alloc(true, true, seed_hash, theta_constants::MAX_THETA, std::move(entries));
|
|
476
|
+
} else if (preamble_longs == 2) {
|
|
477
|
+
const uint32_t num_entries = read<uint32_t>(is);
|
|
478
|
+
read<uint32_t>(is); // unused
|
|
479
|
+
std::vector<uint64_t, A> entries(num_entries, 0, allocator);
|
|
480
|
+
if (num_entries == 0) {
|
|
481
|
+
return compact_theta_sketch_alloc(true, true, seed_hash, theta_constants::MAX_THETA, std::move(entries));
|
|
482
|
+
}
|
|
483
|
+
read(is, entries.data(), entries.size() * sizeof(uint64_t));
|
|
484
|
+
if (!is.good())
|
|
485
|
+
throw std::runtime_error("error reading from std::istream");
|
|
486
|
+
return compact_theta_sketch_alloc(false, true, seed_hash, theta_constants::MAX_THETA, std::move(entries));
|
|
487
|
+
} else if (preamble_longs == 3) {
|
|
488
|
+
const uint32_t num_entries = read<uint32_t>(is);
|
|
489
|
+
read<uint32_t>(is); // unused
|
|
490
|
+
const auto theta = read<uint64_t>(is);
|
|
491
|
+
bool is_empty = (num_entries == 0) && (theta == theta_constants::MAX_THETA);
|
|
492
|
+
std::vector<uint64_t, A> entries(num_entries, 0, allocator);
|
|
493
|
+
if (is_empty) {
|
|
494
|
+
if (!is.good())
|
|
495
|
+
throw std::runtime_error("error reading from std::istream");
|
|
496
|
+
return compact_theta_sketch_alloc(true, true, seed_hash, theta, std::move(entries));
|
|
497
|
+
} else {
|
|
498
|
+
read(is, entries.data(), sizeof(uint64_t) * entries.size());
|
|
499
|
+
if (!is.good())
|
|
500
|
+
throw std::runtime_error("error reading from std::istream");
|
|
501
|
+
return compact_theta_sketch_alloc(false, true, seed_hash, theta, std::move(entries));
|
|
502
|
+
}
|
|
503
|
+
} else {
|
|
504
|
+
throw std::invalid_argument(std::to_string(preamble_longs) + " longs of premable, but expected 1, 2, or 3");
|
|
421
505
|
}
|
|
422
|
-
}
|
|
423
506
|
}
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
507
|
+
default:
|
|
508
|
+
// this should always fail since the valid cases are handled above
|
|
509
|
+
checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
|
|
510
|
+
// this throw is never reached, because check_serial_version will throw an informative exception.
|
|
511
|
+
// This is only here to avoid a compiler warning about a path without a return value.
|
|
512
|
+
throw std::invalid_argument("unexpected sketch serialization version");
|
|
513
|
+
}
|
|
430
514
|
}
|
|
431
515
|
|
|
432
516
|
template<typename A>
|
|
433
517
|
compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) {
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
const char* base = ptr;
|
|
437
|
-
uint8_t preamble_longs;
|
|
438
|
-
ptr += copy_from_mem(ptr, preamble_longs);
|
|
439
|
-
uint8_t serial_version;
|
|
440
|
-
ptr += copy_from_mem(ptr, serial_version);
|
|
441
|
-
uint8_t type;
|
|
442
|
-
ptr += copy_from_mem(ptr, type);
|
|
443
|
-
ptr += sizeof(uint16_t); // unused
|
|
444
|
-
uint8_t flags_byte;
|
|
445
|
-
ptr += copy_from_mem(ptr, flags_byte);
|
|
446
|
-
uint16_t seed_hash;
|
|
447
|
-
ptr += copy_from_mem(ptr, seed_hash);
|
|
448
|
-
checker<true>::check_sketch_type(type, SKETCH_TYPE);
|
|
449
|
-
checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
|
|
450
|
-
const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
|
|
451
|
-
if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
|
|
452
|
-
|
|
453
|
-
uint64_t theta = theta_constants::MAX_THETA;
|
|
454
|
-
uint32_t num_entries = 0;
|
|
455
|
-
if (!is_empty) {
|
|
456
|
-
if (preamble_longs == 1) {
|
|
457
|
-
num_entries = 1;
|
|
458
|
-
} else {
|
|
459
|
-
ensure_minimum_memory(size, 8); // read the first prelong before this method
|
|
460
|
-
ptr += copy_from_mem(ptr, num_entries);
|
|
461
|
-
ptr += sizeof(uint32_t); // unused
|
|
462
|
-
if (preamble_longs > 2) {
|
|
463
|
-
ensure_minimum_memory(size, (preamble_longs - 1) << 3);
|
|
464
|
-
ptr += copy_from_mem(ptr, theta);
|
|
465
|
-
}
|
|
466
|
-
}
|
|
467
|
-
}
|
|
468
|
-
const size_t entries_size_bytes = sizeof(uint64_t) * num_entries;
|
|
469
|
-
check_memory_size(ptr - base + entries_size_bytes, size);
|
|
470
|
-
std::vector<uint64_t, A> entries(num_entries, 0, allocator);
|
|
471
|
-
if (!is_empty) ptr += copy_from_mem(ptr, entries.data(), entries_size_bytes);
|
|
472
|
-
|
|
473
|
-
const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
|
|
474
|
-
return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
|
|
518
|
+
auto data = compact_theta_sketch_parser<true>::parse(bytes, size, seed, false);
|
|
519
|
+
return compact_theta_sketch_alloc(data.is_empty, data.is_ordered, data.seed_hash, data.theta, std::vector<uint64_t, A>(data.entries, data.entries + data.num_entries, allocator));
|
|
475
520
|
}
|
|
476
521
|
|
|
477
522
|
// wrapped compact sketch
|
|
@@ -533,6 +578,18 @@ auto wrapped_compact_theta_sketch_alloc<A>::end() const -> const_iterator {
|
|
|
533
578
|
return entries_ + num_entries_;
|
|
534
579
|
}
|
|
535
580
|
|
|
581
|
+
template<typename A>
|
|
582
|
+
void wrapped_compact_theta_sketch_alloc<A>::print_specifics(std::ostringstream&) const {}
|
|
583
|
+
|
|
584
|
+
template<typename A>
|
|
585
|
+
void wrapped_compact_theta_sketch_alloc<A>::print_items(std::ostringstream& os) const {
|
|
586
|
+
os << "### Retained entries" << std::endl;
|
|
587
|
+
for (const auto& hash: *this) {
|
|
588
|
+
os << hash << std::endl;
|
|
589
|
+
}
|
|
590
|
+
os << "### End retained entries" << std::endl;
|
|
591
|
+
}
|
|
592
|
+
|
|
536
593
|
} /* namespace datasketches */
|
|
537
594
|
|
|
538
595
|
#endif
|
|
@@ -60,11 +60,16 @@ public:
|
|
|
60
60
|
*/
|
|
61
61
|
CompactSketch get_result(bool ordered = true) const;
|
|
62
62
|
|
|
63
|
+
/**
|
|
64
|
+
* Reset the union to the initial empty state
|
|
65
|
+
*/
|
|
66
|
+
void reset();
|
|
67
|
+
|
|
63
68
|
private:
|
|
64
69
|
State state_;
|
|
65
70
|
|
|
66
71
|
// for builder
|
|
67
|
-
theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Allocator& allocator);
|
|
72
|
+
theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Allocator& allocator);
|
|
68
73
|
};
|
|
69
74
|
|
|
70
75
|
template<typename A>
|
|
@@ -38,7 +38,7 @@ public:
|
|
|
38
38
|
using resize_factor = typename hash_table::resize_factor;
|
|
39
39
|
using comparator = compare_by_key<ExtractKey>;
|
|
40
40
|
|
|
41
|
-
theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
|
41
|
+
theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
|
42
42
|
|
|
43
43
|
template<typename FwdSketch>
|
|
44
44
|
void update(FwdSketch&& sketch);
|
|
@@ -47,6 +47,8 @@ public:
|
|
|
47
47
|
|
|
48
48
|
const Policy& get_policy() const;
|
|
49
49
|
|
|
50
|
+
void reset();
|
|
51
|
+
|
|
50
52
|
private:
|
|
51
53
|
Policy policy_;
|
|
52
54
|
hash_table table_;
|
|
@@ -21,6 +21,7 @@
|
|
|
21
21
|
#define THETA_UNION_BASE_IMPL_HPP_
|
|
22
22
|
|
|
23
23
|
#include <algorithm>
|
|
24
|
+
#include <stdexcept>
|
|
24
25
|
|
|
25
26
|
#include "conditional_forward.hpp"
|
|
26
27
|
|
|
@@ -28,9 +29,9 @@ namespace datasketches {
|
|
|
28
29
|
|
|
29
30
|
template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
|
|
30
31
|
theta_union_base<EN, EK, P, S, CS, A>::theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
|
|
31
|
-
uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
|
|
32
|
+
float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
|
|
32
33
|
policy_(policy),
|
|
33
|
-
table_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator),
|
|
34
|
+
table_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator),
|
|
34
35
|
union_theta_(table_.theta_)
|
|
35
36
|
{}
|
|
36
37
|
|
|
@@ -84,6 +85,12 @@ const P& theta_union_base<EN, EK, P, S, CS, A>::get_policy() const {
|
|
|
84
85
|
return policy_;
|
|
85
86
|
}
|
|
86
87
|
|
|
88
|
+
template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
|
|
89
|
+
void theta_union_base<EN, EK, P, S, CS, A>::reset() {
|
|
90
|
+
table_.reset();
|
|
91
|
+
union_theta_ = table_.theta_;
|
|
92
|
+
}
|
|
93
|
+
|
|
87
94
|
} /* namespace datasketches */
|
|
88
95
|
|
|
89
96
|
#endif
|
|
@@ -23,8 +23,8 @@
|
|
|
23
23
|
namespace datasketches {
|
|
24
24
|
|
|
25
25
|
template<typename A>
|
|
26
|
-
theta_union_alloc<A>::theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const A& allocator):
|
|
27
|
-
state_(lg_cur_size, lg_nom_size, rf, theta, seed, nop_policy(), allocator)
|
|
26
|
+
theta_union_alloc<A>::theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const A& allocator):
|
|
27
|
+
state_(lg_cur_size, lg_nom_size, rf, p, theta, seed, nop_policy(), allocator)
|
|
28
28
|
{}
|
|
29
29
|
|
|
30
30
|
template<typename A>
|
|
@@ -38,14 +38,17 @@ auto theta_union_alloc<A>::get_result(bool ordered) const -> CompactSketch {
|
|
|
38
38
|
return state_.get_result(ordered);
|
|
39
39
|
}
|
|
40
40
|
|
|
41
|
+
template<typename A>
|
|
42
|
+
void theta_union_alloc<A>::reset() {
|
|
43
|
+
state_.reset();
|
|
44
|
+
}
|
|
45
|
+
|
|
41
46
|
template<typename A>
|
|
42
47
|
theta_union_alloc<A>::builder::builder(const A& allocator): theta_base_builder<builder, A>(allocator) {}
|
|
43
48
|
|
|
44
49
|
template<typename A>
|
|
45
50
|
auto theta_union_alloc<A>::builder::build() const -> theta_union_alloc {
|
|
46
|
-
return theta_union_alloc(
|
|
47
|
-
this->starting_sub_multiple(this->lg_k_ + 1, this->MIN_LG_K, static_cast<uint8_t>(this->rf_)),
|
|
48
|
-
this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->allocator_);
|
|
51
|
+
return theta_union_alloc(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->allocator_);
|
|
49
52
|
}
|
|
50
53
|
|
|
51
54
|
} /* namespace datasketches */
|
|
@@ -40,8 +40,8 @@ struct theta_update_sketch_base {
|
|
|
40
40
|
using resize_factor = theta_constants::resize_factor;
|
|
41
41
|
using comparator = compare_by_key<ExtractKey>;
|
|
42
42
|
|
|
43
|
-
theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
|
|
44
|
-
uint64_t seed, const Allocator& allocator, bool is_empty = true);
|
|
43
|
+
theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p,
|
|
44
|
+
uint64_t theta, uint64_t seed, const Allocator& allocator, bool is_empty = true);
|
|
45
45
|
theta_update_sketch_base(const theta_update_sketch_base& other);
|
|
46
46
|
theta_update_sketch_base(theta_update_sketch_base&& other) noexcept;
|
|
47
47
|
~theta_update_sketch_base();
|
|
@@ -75,6 +75,7 @@ struct theta_update_sketch_base {
|
|
|
75
75
|
uint8_t lg_cur_size_;
|
|
76
76
|
uint8_t lg_nom_size_;
|
|
77
77
|
resize_factor rf_;
|
|
78
|
+
float p_;
|
|
78
79
|
uint32_t num_entries_;
|
|
79
80
|
uint64_t theta_;
|
|
80
81
|
uint64_t seed_;
|
|
@@ -83,6 +84,7 @@ struct theta_update_sketch_base {
|
|
|
83
84
|
void resize();
|
|
84
85
|
void rebuild();
|
|
85
86
|
void trim();
|
|
87
|
+
void reset();
|
|
86
88
|
|
|
87
89
|
static inline uint32_t get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size);
|
|
88
90
|
static inline uint32_t get_stride(uint64_t key, uint8_t lg_size);
|
|
@@ -94,11 +96,14 @@ struct theta_update_sketch_base {
|
|
|
94
96
|
template<typename Derived, typename Allocator>
|
|
95
97
|
class theta_base_builder {
|
|
96
98
|
public:
|
|
99
|
+
// TODO: Redundant and deprecated. Will be removed in next major version release.
|
|
97
100
|
using resize_factor = theta_constants::resize_factor;
|
|
98
101
|
static const uint8_t MIN_LG_K = theta_constants::MIN_LG_K;
|
|
99
102
|
static const uint8_t MAX_LG_K = theta_constants::MAX_LG_K;
|
|
100
|
-
|
|
101
|
-
|
|
103
|
+
// TODO: The following defaults are redundant and deprecated. Will be removed in the
|
|
104
|
+
// next major version release
|
|
105
|
+
static const uint8_t DEFAULT_LG_K = theta_constants::DEFAULT_LG_K;
|
|
106
|
+
static const resize_factor DEFAULT_RESIZE_FACTOR = theta_constants::DEFAULT_RESIZE_FACTOR;
|
|
102
107
|
|
|
103
108
|
/**
|
|
104
109
|
* Creates and instance of the builder with default parameters.
|
|
@@ -146,7 +151,6 @@ protected:
|
|
|
146
151
|
|
|
147
152
|
uint64_t starting_theta() const;
|
|
148
153
|
uint8_t starting_lg_size() const;
|
|
149
|
-
static uint8_t starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf);
|
|
150
154
|
};
|
|
151
155
|
|
|
152
156
|
// key extractor
|
|
@@ -23,16 +23,20 @@
|
|
|
23
23
|
#include <iostream>
|
|
24
24
|
#include <sstream>
|
|
25
25
|
#include <algorithm>
|
|
26
|
+
#include <stdexcept>
|
|
27
|
+
|
|
28
|
+
#include "theta_helpers.hpp"
|
|
26
29
|
|
|
27
30
|
namespace datasketches {
|
|
28
31
|
|
|
29
32
|
template<typename EN, typename EK, typename A>
|
|
30
|
-
theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const A& allocator, bool is_empty):
|
|
33
|
+
theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const A& allocator, bool is_empty):
|
|
31
34
|
allocator_(allocator),
|
|
32
35
|
is_empty_(is_empty),
|
|
33
36
|
lg_cur_size_(lg_cur_size),
|
|
34
37
|
lg_nom_size_(lg_nom_size),
|
|
35
38
|
rf_(rf),
|
|
39
|
+
p_(p),
|
|
36
40
|
num_entries_(0),
|
|
37
41
|
theta_(theta),
|
|
38
42
|
seed_(seed),
|
|
@@ -52,6 +56,7 @@ is_empty_(other.is_empty_),
|
|
|
52
56
|
lg_cur_size_(other.lg_cur_size_),
|
|
53
57
|
lg_nom_size_(other.lg_nom_size_),
|
|
54
58
|
rf_(other.rf_),
|
|
59
|
+
p_(other.p_),
|
|
55
60
|
num_entries_(other.num_entries_),
|
|
56
61
|
theta_(other.theta_),
|
|
57
62
|
seed_(other.seed_),
|
|
@@ -77,6 +82,7 @@ is_empty_(other.is_empty_),
|
|
|
77
82
|
lg_cur_size_(other.lg_cur_size_),
|
|
78
83
|
lg_nom_size_(other.lg_nom_size_),
|
|
79
84
|
rf_(other.rf_),
|
|
85
|
+
p_(other.p_),
|
|
80
86
|
num_entries_(other.num_entries_),
|
|
81
87
|
theta_(other.theta_),
|
|
82
88
|
seed_(other.seed_),
|
|
@@ -105,6 +111,7 @@ theta_update_sketch_base<EN, EK, A>& theta_update_sketch_base<EN, EK, A>::operat
|
|
|
105
111
|
std::swap(lg_cur_size_, copy.lg_cur_size_);
|
|
106
112
|
std::swap(lg_nom_size_, copy.lg_nom_size_);
|
|
107
113
|
std::swap(rf_, copy.rf_);
|
|
114
|
+
std::swap(p_, copy.p_);
|
|
108
115
|
std::swap(num_entries_, copy.num_entries_);
|
|
109
116
|
std::swap(theta_, copy.theta_);
|
|
110
117
|
std::swap(seed_, copy.seed_);
|
|
@@ -119,6 +126,7 @@ theta_update_sketch_base<EN, EK, A>& theta_update_sketch_base<EN, EK, A>::operat
|
|
|
119
126
|
std::swap(lg_cur_size_, other.lg_cur_size_);
|
|
120
127
|
std::swap(lg_nom_size_, other.lg_nom_size_);
|
|
121
128
|
std::swap(rf_, other.rf_);
|
|
129
|
+
std::swap(p_, other.p_);
|
|
122
130
|
std::swap(num_entries_, other.num_entries_);
|
|
123
131
|
std::swap(theta_, other.theta_);
|
|
124
132
|
std::swap(seed_, other.seed_);
|
|
@@ -247,6 +255,29 @@ void theta_update_sketch_base<EN, EK, A>::trim() {
|
|
|
247
255
|
if (num_entries_ > static_cast<uint32_t>(1 << lg_nom_size_)) rebuild();
|
|
248
256
|
}
|
|
249
257
|
|
|
258
|
+
template<typename EN, typename EK, typename A>
|
|
259
|
+
void theta_update_sketch_base<EN, EK, A>::reset() {
|
|
260
|
+
const size_t cur_size = 1ULL << lg_cur_size_;
|
|
261
|
+
for (size_t i = 0; i < cur_size; ++i) {
|
|
262
|
+
if (EK()(entries_[i]) != 0) {
|
|
263
|
+
entries_[i].~EN();
|
|
264
|
+
EK()(entries_[i]) = 0;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
const uint8_t starting_lg_size = theta_build_helper<true>::starting_sub_multiple(
|
|
268
|
+
lg_nom_size_ + 1, theta_constants::MIN_LG_K, static_cast<uint8_t>(rf_));
|
|
269
|
+
if (starting_lg_size != lg_cur_size_) {
|
|
270
|
+
allocator_.deallocate(entries_, cur_size);
|
|
271
|
+
lg_cur_size_ = starting_lg_size;
|
|
272
|
+
const size_t new_size = 1ULL << starting_lg_size;
|
|
273
|
+
entries_ = allocator_.allocate(new_size);
|
|
274
|
+
for (size_t i = 0; i < new_size; ++i) EK()(entries_[i]) = 0;
|
|
275
|
+
}
|
|
276
|
+
num_entries_ = 0;
|
|
277
|
+
theta_ = theta_build_helper<true>::starting_theta_from_p(p_);
|
|
278
|
+
is_empty_ = true;
|
|
279
|
+
}
|
|
280
|
+
|
|
250
281
|
template<typename EN, typename EK, typename A>
|
|
251
282
|
void theta_update_sketch_base<EN, EK, A>::consolidate_non_empty(EN* entries, size_t size, size_t num) {
|
|
252
283
|
// find the first empty slot
|
|
@@ -271,7 +302,11 @@ void theta_update_sketch_base<EN, EK, A>::consolidate_non_empty(EN* entries, siz
|
|
|
271
302
|
|
|
272
303
|
template<typename Derived, typename Allocator>
|
|
273
304
|
theta_base_builder<Derived, Allocator>::theta_base_builder(const Allocator& allocator):
|
|
274
|
-
allocator_(allocator),
|
|
305
|
+
allocator_(allocator),
|
|
306
|
+
lg_k_(theta_constants::DEFAULT_LG_K),
|
|
307
|
+
rf_(theta_constants::DEFAULT_RESIZE_FACTOR),
|
|
308
|
+
p_(1),
|
|
309
|
+
seed_(DEFAULT_SEED) {}
|
|
275
310
|
|
|
276
311
|
template<typename Derived, typename Allocator>
|
|
277
312
|
Derived& theta_base_builder<Derived, Allocator>::set_lg_k(uint8_t lg_k) {
|
|
@@ -306,18 +341,12 @@ Derived& theta_base_builder<Derived, Allocator>::set_seed(uint64_t seed) {
|
|
|
306
341
|
|
|
307
342
|
template<typename Derived, typename Allocator>
|
|
308
343
|
uint64_t theta_base_builder<Derived, Allocator>::starting_theta() const {
|
|
309
|
-
|
|
310
|
-
return theta_constants::MAX_THETA;
|
|
344
|
+
return theta_build_helper<true>::starting_theta_from_p(p_);
|
|
311
345
|
}
|
|
312
346
|
|
|
313
347
|
template<typename Derived, typename Allocator>
|
|
314
348
|
uint8_t theta_base_builder<Derived, Allocator>::starting_lg_size() const {
|
|
315
|
-
return starting_sub_multiple(lg_k_ + 1, MIN_LG_K, static_cast<uint8_t>(rf_));
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
template<typename Derived, typename Allocator>
|
|
319
|
-
uint8_t theta_base_builder<Derived, Allocator>::starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf) {
|
|
320
|
-
return (lg_tgt <= lg_min) ? lg_min : (lg_rf == 0) ? lg_tgt : ((lg_tgt - lg_min) % lg_rf) + lg_min;
|
|
349
|
+
return theta_build_helper<true>::starting_sub_multiple(lg_k_ + 1, MIN_LG_K, static_cast<uint8_t>(rf_));
|
|
321
350
|
}
|
|
322
351
|
|
|
323
352
|
// iterator
|
|
Binary file
|