datasketches 0.1.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/ext/datasketches/cpc_wrapper.cpp +12 -13
- data/ext/datasketches/ext.cpp +1 -1
- data/ext/datasketches/ext.h +4 -0
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/fi_wrapper.cpp +6 -8
- data/ext/datasketches/hll_wrapper.cpp +13 -14
- data/ext/datasketches/kll_wrapper.cpp +28 -76
- data/ext/datasketches/theta_wrapper.cpp +27 -41
- data/ext/datasketches/vo_wrapper.cpp +4 -6
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +10 -0
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +4 -4
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +18 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +13 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +20 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +116 -105
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +22 -6
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +140 -101
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +20 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -16
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +6 -6
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +21 -21
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +102 -105
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +141 -125
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +5 -5
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +81 -109
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +25 -24
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +89 -105
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +130 -165
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +21 -22
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +88 -83
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +34 -45
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +7 -8
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +41 -52
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +7 -8
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +220 -251
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +42 -42
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +36 -38
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +15 -14
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +47 -44
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +62 -87
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +121 -128
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +25 -53
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +8 -8
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +36 -36
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +28 -28
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +37 -37
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +57 -61
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +40 -25
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +50 -6
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +164 -136
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +178 -88
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +12 -6
- data/vendor/datasketches-cpp/python/README.md +52 -49
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -6
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +4 -2
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +38 -28
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -2
- data/vendor/datasketches-cpp/python/tests/kll_test.py +5 -5
- data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
- data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +18 -8
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +488 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +19 -13
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +130 -127
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +41 -49
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -44
- data/vendor/datasketches-cpp/setup.py +11 -6
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +3 -2
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +11 -4
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +26 -28
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
- data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
- data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +24 -36
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +163 -256
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +250 -651
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +6 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +10 -21
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +44 -30
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +60 -5
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +74 -235
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +57 -70
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +18 -21
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +13 -16
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +7 -6
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +3 -3
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +13 -16
- metadata +51 -36
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -42,8 +42,8 @@ namespace datasketches {
|
|
42
42
|
* author Jon Malkin
|
43
43
|
*/
|
44
44
|
template<typename T, typename S, typename A>
|
45
|
-
var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf) :
|
46
|
-
var_opt_sketch<T,S,A>(k, rf, false) {}
|
45
|
+
var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf, const A& allocator) :
|
46
|
+
var_opt_sketch<T,S,A>(k, rf, false, allocator) {}
|
47
47
|
|
48
48
|
template<typename T, typename S, typename A>
|
49
49
|
var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other) :
|
@@ -56,12 +56,13 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other) :
|
|
56
56
|
rf_(other.rf_),
|
57
57
|
curr_items_alloc_(other.curr_items_alloc_),
|
58
58
|
filled_data_(other.filled_data_),
|
59
|
+
allocator_(other.allocator_),
|
59
60
|
data_(nullptr),
|
60
61
|
weights_(nullptr),
|
61
62
|
num_marks_in_h_(other.num_marks_in_h_),
|
62
63
|
marks_(nullptr)
|
63
64
|
{
|
64
|
-
data_ =
|
65
|
+
data_ = allocator_.allocate(curr_items_alloc_);
|
65
66
|
// skip gap or anything unused at the end
|
66
67
|
for (size_t i = 0; i < h_; ++i)
|
67
68
|
new (&data_[i]) T(other.data_[i]);
|
@@ -71,13 +72,13 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other) :
|
|
71
72
|
// we skipped the gap
|
72
73
|
filled_data_ = false;
|
73
74
|
|
74
|
-
weights_ = AllocDouble().allocate(curr_items_alloc_);
|
75
|
+
weights_ = AllocDouble(allocator_).allocate(curr_items_alloc_);
|
75
76
|
// doubles so can successfully copy regardless of the internal state
|
76
|
-
std::copy(
|
77
|
-
|
77
|
+
std::copy(other.weights_, other.weights_ + curr_items_alloc_, weights_);
|
78
|
+
|
78
79
|
if (other.marks_ != nullptr) {
|
79
|
-
marks_ = AllocBool().allocate(curr_items_alloc_);
|
80
|
-
std::copy(
|
80
|
+
marks_ = AllocBool(allocator_).allocate(curr_items_alloc_);
|
81
|
+
std::copy(other.marks_, other.marks_ + curr_items_alloc_, marks_);
|
81
82
|
}
|
82
83
|
}
|
83
84
|
|
@@ -92,12 +93,13 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other, bool as_sketc
|
|
92
93
|
rf_(other.rf_),
|
93
94
|
curr_items_alloc_(other.curr_items_alloc_),
|
94
95
|
filled_data_(other.filled_data_),
|
96
|
+
allocator_(other.allocator_),
|
95
97
|
data_(nullptr),
|
96
98
|
weights_(nullptr),
|
97
99
|
num_marks_in_h_(other.num_marks_in_h_),
|
98
100
|
marks_(nullptr)
|
99
101
|
{
|
100
|
-
data_ =
|
102
|
+
data_ = allocator_.allocate(curr_items_alloc_);
|
101
103
|
// skip gap or anything unused at the end
|
102
104
|
for (size_t i = 0; i < h_; ++i)
|
103
105
|
new (&data_[i]) T(other.data_[i]);
|
@@ -107,28 +109,29 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other, bool as_sketc
|
|
107
109
|
// we skipped the gap
|
108
110
|
filled_data_ = false;
|
109
111
|
|
110
|
-
weights_ = AllocDouble().allocate(curr_items_alloc_);
|
112
|
+
weights_ = AllocDouble(allocator_).allocate(curr_items_alloc_);
|
111
113
|
// doubles so can successfully copy regardless of the internal state
|
112
|
-
std::copy(
|
114
|
+
std::copy(other.weights_, other.weights_ + curr_items_alloc_, weights_);
|
113
115
|
|
114
116
|
if (!as_sketch && other.marks_ != nullptr) {
|
115
|
-
marks_ = AllocBool().allocate(curr_items_alloc_);
|
116
|
-
std::copy(
|
117
|
+
marks_ = AllocBool(allocator_).allocate(curr_items_alloc_);
|
118
|
+
std::copy(other.marks_, other.marks_ + curr_items_alloc_, marks_);
|
117
119
|
}
|
118
120
|
}
|
119
121
|
|
120
122
|
template<typename T, typename S, typename A>
|
121
123
|
var_opt_sketch<T,S,A>::var_opt_sketch(T* data, double* weights, size_t len,
|
122
|
-
|
124
|
+
uint32_t k, uint64_t n, uint32_t h_count, uint32_t r_count, double total_wt_r, const A& allocator) :
|
123
125
|
k_(k),
|
124
126
|
h_(h_count),
|
125
127
|
m_(0),
|
126
128
|
r_(r_count),
|
127
129
|
n_(n),
|
128
130
|
total_wt_r_(total_wt_r),
|
129
|
-
rf_(DEFAULT_RESIZE_FACTOR),
|
131
|
+
rf_(var_opt_constants::DEFAULT_RESIZE_FACTOR),
|
130
132
|
curr_items_alloc_(len),
|
131
133
|
filled_data_(n > k),
|
134
|
+
allocator_(allocator),
|
132
135
|
data_(data),
|
133
136
|
weights_(weights),
|
134
137
|
num_marks_in_h_(0),
|
@@ -146,6 +149,7 @@ var_opt_sketch<T,S,A>::var_opt_sketch(var_opt_sketch&& other) noexcept :
|
|
146
149
|
rf_(other.rf_),
|
147
150
|
curr_items_alloc_(other.curr_items_alloc_),
|
148
151
|
filled_data_(other.filled_data_),
|
152
|
+
allocator_(other.allocator_),
|
149
153
|
data_(other.data_),
|
150
154
|
weights_(other.weights_),
|
151
155
|
num_marks_in_h_(other.num_marks_in_h_),
|
@@ -157,8 +161,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(var_opt_sketch&& other) noexcept :
|
|
157
161
|
}
|
158
162
|
|
159
163
|
template<typename T, typename S, typename A>
|
160
|
-
var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadget) :
|
161
|
-
k_(k), h_(0), m_(0), r_(0), n_(0), total_wt_r_(0.0), rf_(rf) {
|
164
|
+
var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadget, const A& allocator) :
|
165
|
+
k_(k), h_(0), m_(0), r_(0), n_(0), total_wt_r_(0.0), rf_(rf), allocator_(allocator) {
|
162
166
|
if (k == 0 || k_ > MAX_K) {
|
163
167
|
throw std::invalid_argument("k must be at least 1 and less than 2^31 - 1");
|
164
168
|
}
|
@@ -178,7 +182,7 @@ template<typename T, typename S, typename A>
|
|
178
182
|
var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, uint32_t h, uint32_t m, uint32_t r, uint64_t n, double total_wt_r, resize_factor rf,
|
179
183
|
uint32_t curr_items_alloc, bool filled_data, std::unique_ptr<T, items_deleter> items,
|
180
184
|
std::unique_ptr<double, weights_deleter> weights, uint32_t num_marks_in_h,
|
181
|
-
std::unique_ptr<bool, marks_deleter> marks) :
|
185
|
+
std::unique_ptr<bool, marks_deleter> marks, const A& allocator) :
|
182
186
|
k_(k),
|
183
187
|
h_(h),
|
184
188
|
m_(m),
|
@@ -188,6 +192,7 @@ var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, uint32_t h, uint32_t m, uint32
|
|
188
192
|
rf_(rf),
|
189
193
|
curr_items_alloc_(curr_items_alloc),
|
190
194
|
filled_data_(filled_data),
|
195
|
+
allocator_(allocator),
|
191
196
|
data_(items.release()),
|
192
197
|
weights_(weights.release()),
|
193
198
|
num_marks_in_h_(num_marks_in_h),
|
@@ -202,27 +207,27 @@ var_opt_sketch<T,S,A>::~var_opt_sketch() {
|
|
202
207
|
// destroy everything
|
203
208
|
const size_t num_to_destroy = std::min(k_ + 1, curr_items_alloc_);
|
204
209
|
for (size_t i = 0; i < num_to_destroy; ++i) {
|
205
|
-
|
210
|
+
allocator_.destroy(data_ + i);
|
206
211
|
}
|
207
212
|
} else {
|
208
213
|
// skip gap or anything unused at the end
|
209
214
|
for (size_t i = 0; i < h_; ++i) {
|
210
|
-
|
215
|
+
allocator_.destroy(data_+ i);
|
211
216
|
}
|
212
217
|
|
213
218
|
for (size_t i = h_ + 1; i < h_ + r_ + 1; ++i) {
|
214
|
-
|
219
|
+
allocator_.destroy(data_ + i);
|
215
220
|
}
|
216
221
|
}
|
217
|
-
|
222
|
+
allocator_.deallocate(data_, curr_items_alloc_);
|
218
223
|
}
|
219
224
|
|
220
225
|
if (weights_ != nullptr) {
|
221
|
-
AllocDouble().deallocate(weights_, curr_items_alloc_);
|
226
|
+
AllocDouble(allocator_).deallocate(weights_, curr_items_alloc_);
|
222
227
|
}
|
223
228
|
|
224
229
|
if (marks_ != nullptr) {
|
225
|
-
AllocBool().deallocate(marks_, curr_items_alloc_);
|
230
|
+
AllocBool(allocator_).deallocate(marks_, curr_items_alloc_);
|
226
231
|
}
|
227
232
|
}
|
228
233
|
|
@@ -238,6 +243,7 @@ var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(const var_opt_sketch& ot
|
|
238
243
|
std::swap(rf_, sk_copy.rf_);
|
239
244
|
std::swap(curr_items_alloc_, sk_copy.curr_items_alloc_);
|
240
245
|
std::swap(filled_data_, sk_copy.filled_data_);
|
246
|
+
std::swap(allocator_, sk_copy.allocator_);
|
241
247
|
std::swap(data_, sk_copy.data_);
|
242
248
|
std::swap(weights_, sk_copy.weights_);
|
243
249
|
std::swap(num_marks_in_h_, sk_copy.num_marks_in_h_);
|
@@ -256,6 +262,7 @@ var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(var_opt_sketch&& other)
|
|
256
262
|
std::swap(rf_, other.rf_);
|
257
263
|
std::swap(curr_items_alloc_, other.curr_items_alloc_);
|
258
264
|
std::swap(filled_data_, other.filled_data_);
|
265
|
+
std::swap(allocator_, other.allocator_);
|
259
266
|
std::swap(data_, other.data_);
|
260
267
|
std::swap(weights_, other.weights_);
|
261
268
|
std::swap(num_marks_in_h_, other.num_marks_in_h_);
|
@@ -327,7 +334,7 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
|
|
327
334
|
num_bytes += (h_ / 8) + (h_ % 8 > 0);
|
328
335
|
}
|
329
336
|
// must iterate over the items
|
330
|
-
for (auto
|
337
|
+
for (auto it: *this)
|
331
338
|
num_bytes += S().size_of_item(it.first);
|
332
339
|
return num_bytes;
|
333
340
|
}
|
@@ -335,7 +342,7 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
|
|
335
342
|
template<typename T, typename S, typename A>
|
336
343
|
std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned header_size_bytes) const {
|
337
344
|
const size_t size = header_size_bytes + get_serialized_size_bytes();
|
338
|
-
std::vector<uint8_t, AllocU8<A>> bytes(size);
|
345
|
+
std::vector<uint8_t, AllocU8<A>> bytes(size, 0, allocator_);
|
339
346
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
340
347
|
uint8_t* end_ptr = ptr + size;
|
341
348
|
|
@@ -352,21 +359,21 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
|
|
352
359
|
// first prelong
|
353
360
|
uint8_t ser_ver(SER_VER);
|
354
361
|
uint8_t family(FAMILY_ID);
|
355
|
-
ptr += copy_to_mem(
|
356
|
-
ptr += copy_to_mem(
|
357
|
-
ptr += copy_to_mem(
|
358
|
-
ptr += copy_to_mem(
|
359
|
-
ptr += copy_to_mem(
|
362
|
+
ptr += copy_to_mem(first_byte, ptr);
|
363
|
+
ptr += copy_to_mem(ser_ver, ptr);
|
364
|
+
ptr += copy_to_mem(family, ptr);
|
365
|
+
ptr += copy_to_mem(flags, ptr);
|
366
|
+
ptr += copy_to_mem(k_, ptr);
|
360
367
|
|
361
368
|
if (!empty) {
|
362
369
|
// second and third prelongs
|
363
|
-
ptr += copy_to_mem(
|
364
|
-
ptr += copy_to_mem(
|
365
|
-
ptr += copy_to_mem(
|
370
|
+
ptr += copy_to_mem(n_, ptr);
|
371
|
+
ptr += copy_to_mem(h_, ptr);
|
372
|
+
ptr += copy_to_mem(r_, ptr);
|
366
373
|
|
367
374
|
// fourth prelong, if needed
|
368
375
|
if (r_ > 0) {
|
369
|
-
ptr += copy_to_mem(
|
376
|
+
ptr += copy_to_mem(total_wt_r_, ptr);
|
370
377
|
}
|
371
378
|
|
372
379
|
// first h_ weights
|
@@ -381,14 +388,14 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
|
|
381
388
|
}
|
382
389
|
|
383
390
|
if ((i & 0x7) == 0x7) {
|
384
|
-
ptr += copy_to_mem(
|
391
|
+
ptr += copy_to_mem(val, ptr);
|
385
392
|
val = 0;
|
386
393
|
}
|
387
394
|
}
|
388
395
|
|
389
396
|
// write out any remaining values
|
390
397
|
if ((h_ & 0x7) > 0) {
|
391
|
-
ptr += copy_to_mem(
|
398
|
+
ptr += copy_to_mem(val, ptr);
|
392
399
|
}
|
393
400
|
}
|
394
401
|
|
@@ -421,25 +428,25 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
|
|
421
428
|
// first prelong
|
422
429
|
const uint8_t ser_ver(SER_VER);
|
423
430
|
const uint8_t family(FAMILY_ID);
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
431
|
+
write(os, first_byte);
|
432
|
+
write(os, ser_ver);
|
433
|
+
write(os, family);
|
434
|
+
write(os, flags);
|
435
|
+
write(os, k_);
|
429
436
|
|
430
437
|
if (!empty) {
|
431
438
|
// second and third prelongs
|
432
|
-
|
433
|
-
|
434
|
-
|
439
|
+
write(os, n_);
|
440
|
+
write(os, h_);
|
441
|
+
write(os, r_);
|
435
442
|
|
436
443
|
// fourth prelong, if needed
|
437
444
|
if (r_ > 0) {
|
438
|
-
|
445
|
+
write(os, total_wt_r_);
|
439
446
|
}
|
440
447
|
|
441
448
|
// write the first h_ weights
|
442
|
-
|
449
|
+
write(os, weights_, h_ * sizeof(double));
|
443
450
|
|
444
451
|
// write the first h_ marks as packed bytes iff we have a gadget
|
445
452
|
if (marks_ != nullptr) {
|
@@ -450,14 +457,14 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
|
|
450
457
|
}
|
451
458
|
|
452
459
|
if ((i & 0x7) == 0x7) {
|
453
|
-
|
460
|
+
write(os, val);
|
454
461
|
val = 0;
|
455
462
|
}
|
456
463
|
}
|
457
464
|
|
458
465
|
// write out any remaining values
|
459
466
|
if ((h_ & 0x7) > 0) {
|
460
|
-
|
467
|
+
write(os, val);
|
461
468
|
}
|
462
469
|
}
|
463
470
|
|
@@ -468,23 +475,23 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
|
|
468
475
|
}
|
469
476
|
|
470
477
|
template<typename T, typename S, typename A>
|
471
|
-
var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size_t size) {
|
478
|
+
var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size_t size, const A& allocator) {
|
472
479
|
ensure_minimum_memory(size, 8);
|
473
480
|
const char* ptr = static_cast<const char*>(bytes);
|
474
481
|
const char* base = ptr;
|
475
482
|
const char* end_ptr = ptr + size;
|
476
483
|
uint8_t first_byte;
|
477
|
-
ptr += copy_from_mem(ptr,
|
484
|
+
ptr += copy_from_mem(ptr, first_byte);
|
478
485
|
uint8_t preamble_longs = first_byte & 0x3f;
|
479
486
|
resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
|
480
487
|
uint8_t serial_version;
|
481
|
-
ptr += copy_from_mem(ptr,
|
488
|
+
ptr += copy_from_mem(ptr, serial_version);
|
482
489
|
uint8_t family_id;
|
483
|
-
ptr += copy_from_mem(ptr,
|
490
|
+
ptr += copy_from_mem(ptr, family_id);
|
484
491
|
uint8_t flags;
|
485
|
-
ptr += copy_from_mem(ptr,
|
492
|
+
ptr += copy_from_mem(ptr, flags);
|
486
493
|
uint32_t k;
|
487
|
-
ptr += copy_from_mem(ptr,
|
494
|
+
ptr += copy_from_mem(ptr, k);
|
488
495
|
|
489
496
|
check_preamble_longs(preamble_longs, flags);
|
490
497
|
check_family_and_serialization_version(family_id, serial_version);
|
@@ -494,22 +501,22 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
|
|
494
501
|
const bool is_gadget = flags & GADGET_FLAG_MASK;
|
495
502
|
|
496
503
|
if (is_empty) {
|
497
|
-
return var_opt_sketch<T,S,A>(k, rf, is_gadget);
|
504
|
+
return var_opt_sketch<T,S,A>(k, rf, is_gadget, allocator);
|
498
505
|
}
|
499
506
|
|
500
507
|
// second and third prelongs
|
501
508
|
uint64_t n;
|
502
509
|
uint32_t h, r;
|
503
|
-
ptr += copy_from_mem(ptr,
|
504
|
-
ptr += copy_from_mem(ptr,
|
505
|
-
ptr += copy_from_mem(ptr,
|
510
|
+
ptr += copy_from_mem(ptr, n);
|
511
|
+
ptr += copy_from_mem(ptr, h);
|
512
|
+
ptr += copy_from_mem(ptr, r);
|
506
513
|
|
507
514
|
const uint32_t array_size = validate_and_get_target_size(preamble_longs, k, n, h, r, rf);
|
508
515
|
|
509
516
|
// current_items_alloc_ is set but validate R region weight (4th prelong), if needed, before allocating
|
510
517
|
double total_wt_r = 0.0;
|
511
518
|
if (preamble_longs == PREAMBLE_LONGS_FULL) {
|
512
|
-
ptr += copy_from_mem(ptr,
|
519
|
+
ptr += copy_from_mem(ptr, total_wt_r);
|
513
520
|
if (std::isnan(total_wt_r) || r == 0 || total_wt_r <= 0.0) {
|
514
521
|
throw std::invalid_argument("Possible corruption: deserializing in full mode but r = 0 or invalid R weight. "
|
515
522
|
"Found r = " + std::to_string(r) + ", R region weight = " + std::to_string(total_wt_r));
|
@@ -520,7 +527,8 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
|
|
520
527
|
|
521
528
|
// read the first h_ weights, fill in rest of array with -1.0
|
522
529
|
check_memory_size(ptr - base + (h * sizeof(double)), size);
|
523
|
-
std::unique_ptr<double, weights_deleter> weights(AllocDouble().allocate(array_size),
|
530
|
+
std::unique_ptr<double, weights_deleter> weights(AllocDouble(allocator).allocate(array_size),
|
531
|
+
weights_deleter(array_size, allocator));
|
524
532
|
double* wts = weights.get(); // to avoid lots of .get() calls -- do not delete
|
525
533
|
ptr += copy_from_mem(ptr, wts, h * sizeof(double));
|
526
534
|
for (size_t i = 0; i < h; ++i) {
|
@@ -528,19 +536,19 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
|
|
528
536
|
throw std::invalid_argument("Possible corruption: Non-positive weight when deserializing: " + std::to_string(wts[i]));
|
529
537
|
}
|
530
538
|
}
|
531
|
-
std::fill(
|
539
|
+
std::fill(wts + h, wts + array_size, -1.0);
|
532
540
|
|
533
541
|
// read the first h_ marks as packed bytes iff we have a gadget
|
534
542
|
uint32_t num_marks_in_h = 0;
|
535
|
-
std::unique_ptr<bool, marks_deleter> marks(nullptr, marks_deleter(array_size));
|
543
|
+
std::unique_ptr<bool, marks_deleter> marks(nullptr, marks_deleter(array_size, allocator));
|
536
544
|
if (is_gadget) {
|
537
545
|
uint8_t val = 0;
|
538
|
-
marks = std::unique_ptr<bool, marks_deleter>(AllocBool().allocate(array_size), marks_deleter(array_size));
|
546
|
+
marks = std::unique_ptr<bool, marks_deleter>(AllocBool(allocator).allocate(array_size), marks_deleter(array_size, allocator));
|
539
547
|
const size_t size_marks = (h / 8) + (h % 8 > 0 ? 1 : 0);
|
540
548
|
check_memory_size(ptr - base + size_marks, size);
|
541
549
|
for (uint32_t i = 0; i < h; ++i) {
|
542
550
|
if ((i & 0x7) == 0x0) { // should trigger on first iteration
|
543
|
-
ptr += copy_from_mem(ptr,
|
551
|
+
ptr += copy_from_mem(ptr, val);
|
544
552
|
}
|
545
553
|
marks.get()[i] = ((val >> (i & 0x7)) & 0x1) == 1;
|
546
554
|
num_marks_in_h += (marks.get()[i] ? 1 : 0);
|
@@ -548,8 +556,8 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
|
|
548
556
|
}
|
549
557
|
|
550
558
|
// read the sample items, skipping the gap. Either h_ or r_ may be 0
|
551
|
-
items_deleter deleter(array_size);
|
552
|
-
std::unique_ptr<T, items_deleter> items(A().allocate(array_size), deleter);
|
559
|
+
items_deleter deleter(array_size, allocator);
|
560
|
+
std::unique_ptr<T, items_deleter> items(A(allocator).allocate(array_size), deleter);
|
553
561
|
|
554
562
|
ptr += S().deserialize(ptr, end_ptr - ptr, items.get(), h);
|
555
563
|
items.get_deleter().set_h(h); // serde didn't throw, so the items are now valid
|
@@ -558,23 +566,18 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
|
|
558
566
|
items.get_deleter().set_r(r); // serde didn't throw, so the items are now valid
|
559
567
|
|
560
568
|
return var_opt_sketch(k, h, (r > 0 ? 1 : 0), r, n, total_wt_r, rf, array_size, false,
|
561
|
-
std::move(items), std::move(weights), num_marks_in_h, std::move(marks));
|
569
|
+
std::move(items), std::move(weights), num_marks_in_h, std::move(marks), allocator);
|
562
570
|
}
|
563
571
|
|
564
572
|
template<typename T, typename S, typename A>
|
565
|
-
var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
|
566
|
-
|
567
|
-
is.read((char*)&first_byte, sizeof(first_byte));
|
573
|
+
var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const A& allocator) {
|
574
|
+
const auto first_byte = read<uint8_t>(is);
|
568
575
|
uint8_t preamble_longs = first_byte & 0x3f;
|
569
|
-
resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
|
570
|
-
|
571
|
-
|
572
|
-
uint8_t
|
573
|
-
|
574
|
-
uint8_t flags;
|
575
|
-
is.read((char*)&flags, sizeof(flags));
|
576
|
-
uint32_t k;
|
577
|
-
is.read((char*)&k, sizeof(k));
|
576
|
+
const resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
|
577
|
+
const auto serial_version = read<uint8_t>(is);
|
578
|
+
const auto family_id = read<uint8_t>(is);
|
579
|
+
const auto flags = read<uint8_t>(is);
|
580
|
+
const auto k = read<uint32_t>(is);
|
578
581
|
|
579
582
|
check_preamble_longs(preamble_longs, flags);
|
580
583
|
check_family_and_serialization_version(family_id, serial_version);
|
@@ -586,50 +589,47 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
|
|
586
589
|
if (!is.good())
|
587
590
|
throw std::runtime_error("error reading from std::istream");
|
588
591
|
else
|
589
|
-
return var_opt_sketch<T,S,A>(k, rf, is_gadget);
|
592
|
+
return var_opt_sketch<T,S,A>(k, rf, is_gadget, allocator);
|
590
593
|
}
|
591
594
|
|
592
595
|
// second and third prelongs
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
is.read((char*)&h, sizeof(h));
|
597
|
-
is.read((char*)&r, sizeof(r));
|
596
|
+
const auto n = read<uint64_t>(is);
|
597
|
+
const auto h = read<uint32_t>(is);
|
598
|
+
const auto r = read<uint32_t>(is);
|
598
599
|
|
599
600
|
const uint32_t array_size = validate_and_get_target_size(preamble_longs, k, n, h, r, rf);
|
600
601
|
|
601
602
|
// current_items_alloc_ is set but validate R region weight (4th prelong), if needed, before allocating
|
602
603
|
double total_wt_r = 0.0;
|
603
604
|
if (preamble_longs == PREAMBLE_LONGS_FULL) {
|
604
|
-
|
605
|
+
total_wt_r = read<double>(is);
|
605
606
|
if (std::isnan(total_wt_r) || r == 0 || total_wt_r <= 0.0) {
|
606
607
|
throw std::invalid_argument("Possible corruption: deserializing in full mode but r = 0 or invalid R weight. "
|
607
608
|
"Found r = " + std::to_string(r) + ", R region weight = " + std::to_string(total_wt_r));
|
608
609
|
}
|
609
|
-
} else {
|
610
|
-
total_wt_r = 0.0;
|
611
610
|
}
|
612
611
|
|
613
612
|
// read the first h weights, fill remainder with -1.0
|
614
|
-
std::unique_ptr<double, weights_deleter> weights(AllocDouble().allocate(array_size),
|
613
|
+
std::unique_ptr<double, weights_deleter> weights(AllocDouble(allocator).allocate(array_size),
|
614
|
+
weights_deleter(array_size, allocator));
|
615
615
|
double* wts = weights.get(); // to avoid lots of .get() calls -- do not delete
|
616
|
-
|
616
|
+
read(is, wts, h * sizeof(double));
|
617
617
|
for (size_t i = 0; i < h; ++i) {
|
618
618
|
if (!(wts[i] > 0.0)) {
|
619
619
|
throw std::invalid_argument("Possible corruption: Non-positive weight when deserializing: " + std::to_string(wts[i]));
|
620
620
|
}
|
621
621
|
}
|
622
|
-
std::fill(
|
622
|
+
std::fill(wts + h, wts + array_size, -1.0);
|
623
623
|
|
624
624
|
// read the first h_ marks as packed bytes iff we have a gadget
|
625
625
|
uint32_t num_marks_in_h = 0;
|
626
|
-
std::unique_ptr<bool, marks_deleter> marks(nullptr, marks_deleter(array_size));
|
626
|
+
std::unique_ptr<bool, marks_deleter> marks(nullptr, marks_deleter(array_size, allocator));
|
627
627
|
if (is_gadget) {
|
628
|
-
marks = std::unique_ptr<bool, marks_deleter>(AllocBool().allocate(array_size), marks_deleter(array_size));
|
628
|
+
marks = std::unique_ptr<bool, marks_deleter>(AllocBool(allocator).allocate(array_size), marks_deleter(array_size, allocator));
|
629
629
|
uint8_t val = 0;
|
630
630
|
for (uint32_t i = 0; i < h; ++i) {
|
631
631
|
if ((i & 0x7) == 0x0) { // should trigger on first iteration
|
632
|
-
|
632
|
+
val = read<uint8_t>(is);
|
633
633
|
}
|
634
634
|
marks.get()[i] = ((val >> (i & 0x7)) & 0x1) == 1;
|
635
635
|
num_marks_in_h += (marks.get()[i] ? 1 : 0);
|
@@ -637,12 +637,12 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
|
|
637
637
|
}
|
638
638
|
|
639
639
|
// read the sample items, skipping the gap. Either h or r may be 0
|
640
|
-
items_deleter deleter(array_size);
|
641
|
-
std::unique_ptr<T, items_deleter> items(A().allocate(array_size), deleter);
|
642
|
-
|
640
|
+
items_deleter deleter(array_size, allocator);
|
641
|
+
std::unique_ptr<T, items_deleter> items(A(allocator).allocate(array_size), deleter);
|
642
|
+
|
643
643
|
S().deserialize(is, items.get(), h); // aka &data_[0]
|
644
644
|
items.get_deleter().set_h(h); // serde didn't throw, so the items are now valid
|
645
|
-
|
645
|
+
|
646
646
|
S().deserialize(is, &(items.get()[h + 1]), r);
|
647
647
|
items.get_deleter().set_r(r); // serde didn't throw, so the items are now valid
|
648
648
|
|
@@ -650,7 +650,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is) {
|
|
650
650
|
throw std::runtime_error("error reading from std::istream");
|
651
651
|
|
652
652
|
return var_opt_sketch(k, h, (r > 0 ? 1 : 0), r, n, total_wt_r, rf, array_size, false,
|
653
|
-
std::move(items), std::move(weights), num_marks_in_h, std::move(marks));
|
653
|
+
std::move(items), std::move(weights), num_marks_in_h, std::move(marks), allocator);
|
654
654
|
}
|
655
655
|
|
656
656
|
template<typename T, typename S, typename A>
|
@@ -672,24 +672,24 @@ void var_opt_sketch<T,S,A>::reset() {
|
|
672
672
|
// destroy everything
|
673
673
|
const size_t num_to_destroy = std::min(k_ + 1, prev_alloc);
|
674
674
|
for (size_t i = 0; i < num_to_destroy; ++i)
|
675
|
-
|
675
|
+
allocator_.destroy(data_ + i);
|
676
676
|
} else {
|
677
677
|
// skip gap or anything unused at the end
|
678
678
|
for (size_t i = 0; i < h_; ++i)
|
679
|
-
|
679
|
+
allocator_.destroy(data_+ i);
|
680
680
|
|
681
681
|
for (size_t i = h_ + 1; i < h_ + r_ + 1; ++i)
|
682
|
-
|
682
|
+
allocator_.destroy(data_ + i);
|
683
683
|
}
|
684
684
|
|
685
685
|
if (curr_items_alloc_ < prev_alloc) {
|
686
686
|
const bool is_gadget = (marks_ != nullptr);
|
687
687
|
|
688
|
-
|
689
|
-
AllocDouble().deallocate(weights_, prev_alloc);
|
688
|
+
allocator_.deallocate(data_, prev_alloc);
|
689
|
+
AllocDouble(allocator_).deallocate(weights_, prev_alloc);
|
690
690
|
|
691
691
|
if (marks_ != nullptr)
|
692
|
-
AllocBool().deallocate(marks_, prev_alloc);
|
692
|
+
AllocBool(allocator_).deallocate(marks_, prev_alloc);
|
693
693
|
|
694
694
|
allocate_data_arrays(curr_items_alloc_, is_gadget);
|
695
695
|
}
|
@@ -970,11 +970,11 @@ template<typename T, typename S, typename A>
|
|
970
970
|
void var_opt_sketch<T,S,A>::allocate_data_arrays(uint32_t tgt_size, bool use_marks) {
|
971
971
|
filled_data_ = false;
|
972
972
|
|
973
|
-
data_ =
|
974
|
-
weights_ = AllocDouble().allocate(tgt_size);
|
973
|
+
data_ = allocator_.allocate(tgt_size);
|
974
|
+
weights_ = AllocDouble(allocator_).allocate(tgt_size);
|
975
975
|
|
976
976
|
if (use_marks) {
|
977
|
-
marks_ = AllocBool().allocate(tgt_size);
|
977
|
+
marks_ = AllocBool(allocator_).allocate(tgt_size);
|
978
978
|
} else {
|
979
979
|
marks_ = nullptr;
|
980
980
|
}
|
@@ -991,27 +991,27 @@ void var_opt_sketch<T,S,A>::grow_data_arrays() {
|
|
991
991
|
if (prev_size < curr_items_alloc_) {
|
992
992
|
filled_data_ = false;
|
993
993
|
|
994
|
-
T* tmp_data =
|
995
|
-
double* tmp_weights = AllocDouble().allocate(curr_items_alloc_);
|
994
|
+
T* tmp_data = allocator_.allocate(curr_items_alloc_);
|
995
|
+
double* tmp_weights = AllocDouble(allocator_).allocate(curr_items_alloc_);
|
996
996
|
|
997
997
|
for (uint32_t i = 0; i < prev_size; ++i) {
|
998
998
|
new (&tmp_data[i]) T(std::move(data_[i]));
|
999
|
-
|
999
|
+
allocator_.destroy(data_ + i);
|
1000
1000
|
tmp_weights[i] = weights_[i];
|
1001
1001
|
}
|
1002
1002
|
|
1003
|
-
|
1004
|
-
AllocDouble().deallocate(weights_, prev_size);
|
1003
|
+
allocator_.deallocate(data_, prev_size);
|
1004
|
+
AllocDouble(allocator_).deallocate(weights_, prev_size);
|
1005
1005
|
|
1006
1006
|
data_ = tmp_data;
|
1007
1007
|
weights_ = tmp_weights;
|
1008
1008
|
|
1009
1009
|
if (marks_ != nullptr) {
|
1010
|
-
bool* tmp_marks = AllocBool().allocate(curr_items_alloc_);
|
1010
|
+
bool* tmp_marks = AllocBool(allocator_).allocate(curr_items_alloc_);
|
1011
1011
|
for (uint32_t i = 0; i < prev_size; ++i) {
|
1012
1012
|
tmp_marks[i] = marks_[i];
|
1013
1013
|
}
|
1014
|
-
AllocBool().deallocate(marks_, prev_size);
|
1014
|
+
AllocBool(allocator_).deallocate(marks_, prev_size);
|
1015
1015
|
marks_ = tmp_marks;
|
1016
1016
|
}
|
1017
1017
|
}
|
@@ -1296,7 +1296,7 @@ template<typename T, typename S, typename A>
|
|
1296
1296
|
void var_opt_sketch<T,S,A>::strip_marks() {
|
1297
1297
|
if (marks_ == nullptr) throw std::logic_error("request to strip marks from non-gadget");
|
1298
1298
|
num_marks_in_h_ = 0;
|
1299
|
-
AllocBool().deallocate(marks_, curr_items_alloc_);
|
1299
|
+
AllocBool(allocator_).deallocate(marks_, curr_items_alloc_);
|
1300
1300
|
marks_ = nullptr;
|
1301
1301
|
}
|
1302
1302
|
|
@@ -1411,7 +1411,7 @@ subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(P predicate) const {
|
|
1411
1411
|
if (effective_sampling_rate < 0.0 || effective_sampling_rate > 1.0)
|
1412
1412
|
throw std::logic_error("invalid sampling rate outside [0.0, 1.0]");
|
1413
1413
|
|
1414
|
-
|
1414
|
+
uint32_t r_true_count = 0;
|
1415
1415
|
++idx; // skip the gap
|
1416
1416
|
for (; idx < (k_ + 1); ++idx) {
|
1417
1417
|
if (predicate(data_[idx])) {
|
@@ -1433,10 +1433,10 @@ subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(P predicate) const {
|
|
1433
1433
|
template<typename T, typename S, typename A>
|
1434
1434
|
class var_opt_sketch<T, S, A>::items_deleter {
|
1435
1435
|
public:
|
1436
|
-
items_deleter(uint32_t num) : num(num), h_count(0), r_count(0) {}
|
1436
|
+
items_deleter(uint32_t num, const A& allocator) : num(num), h_count(0), r_count(0), allocator(allocator) {}
|
1437
1437
|
void set_h(uint32_t h) { h_count = h; }
|
1438
1438
|
void set_r(uint32_t r) { r_count = r; }
|
1439
|
-
void operator() (T* ptr)
|
1439
|
+
void operator() (T* ptr) {
|
1440
1440
|
if (h_count > 0) {
|
1441
1441
|
for (size_t i = 0; i < h_count; ++i) {
|
1442
1442
|
ptr[i].~T();
|
@@ -1449,39 +1449,42 @@ class var_opt_sketch<T, S, A>::items_deleter {
|
|
1449
1449
|
}
|
1450
1450
|
}
|
1451
1451
|
if (ptr != nullptr) {
|
1452
|
-
|
1452
|
+
allocator.deallocate(ptr, num);
|
1453
1453
|
}
|
1454
1454
|
}
|
1455
1455
|
private:
|
1456
1456
|
uint32_t num;
|
1457
1457
|
uint32_t h_count;
|
1458
1458
|
uint32_t r_count;
|
1459
|
+
A allocator;
|
1459
1460
|
};
|
1460
1461
|
|
1461
1462
|
template<typename T, typename S, typename A>
|
1462
1463
|
class var_opt_sketch<T, S, A>::weights_deleter {
|
1463
1464
|
public:
|
1464
|
-
weights_deleter(uint32_t num) : num(num) {}
|
1465
|
-
void operator() (double* ptr)
|
1465
|
+
weights_deleter(uint32_t num, const A& allocator) : num(num), allocator(allocator) {}
|
1466
|
+
void operator() (double* ptr) {
|
1466
1467
|
if (ptr != nullptr) {
|
1467
|
-
|
1468
|
+
allocator.deallocate(ptr, num);
|
1468
1469
|
}
|
1469
1470
|
}
|
1470
1471
|
private:
|
1471
1472
|
uint32_t num;
|
1473
|
+
AllocDouble allocator;
|
1472
1474
|
};
|
1473
1475
|
|
1474
1476
|
template<typename T, typename S, typename A>
|
1475
1477
|
class var_opt_sketch<T, S, A>::marks_deleter {
|
1476
1478
|
public:
|
1477
|
-
marks_deleter(uint32_t num) : num(num) {}
|
1478
|
-
void operator() (bool* ptr)
|
1479
|
+
marks_deleter(uint32_t num, const A& allocator) : num(num), allocator(allocator) {}
|
1480
|
+
void operator() (bool* ptr) {
|
1479
1481
|
if (ptr != nullptr) {
|
1480
|
-
|
1482
|
+
allocator.deallocate(ptr, 1);
|
1481
1483
|
}
|
1482
1484
|
}
|
1483
1485
|
private:
|
1484
1486
|
uint32_t num;
|
1487
|
+
AllocBool allocator;
|
1485
1488
|
};
|
1486
1489
|
|
1487
1490
|
|