datasketches 0.2.2 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/README.md +8 -8
- data/ext/datasketches/kll_wrapper.cpp +5 -1
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +27 -5
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +18 -13
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +16 -0
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +10 -6
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +30 -12
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +5 -19
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +103 -44
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +110 -130
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +156 -23
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +17 -6
- data/vendor/datasketches-cpp/python/README.md +57 -50
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +49 -14
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +12 -5
- data/vendor/datasketches-cpp/python/tests/kll_test.py +12 -6
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
- data/vendor/datasketches-cpp/req/CMakeLists.txt +6 -21
- data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +66 -61
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +54 -12
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +45 -34
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +33 -15
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
- data/vendor/datasketches-cpp/setup.py +10 -7
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +92 -23
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +3 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +150 -93
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -2
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +9 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +39 -10
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +446 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +429 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -11
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +29 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +34 -14
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +8 -0
- metadata +34 -12
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
|
@@ -51,18 +51,27 @@ struct subset_summary {
|
|
|
51
51
|
double total_sketch_weight;
|
|
52
52
|
};
|
|
53
53
|
|
|
54
|
-
enum resize_factor { X1 = 0, X2, X4, X8 };
|
|
55
|
-
|
|
56
54
|
template <typename T, typename S, typename A> class var_opt_union; // forward declaration
|
|
57
55
|
|
|
58
|
-
|
|
56
|
+
namespace var_opt_constants {
|
|
57
|
+
const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
|
|
58
|
+
const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
template<
|
|
62
|
+
typename T,
|
|
63
|
+
typename S = serde<T>, // deprecated, to be removed in the next major version
|
|
64
|
+
typename A = std::allocator<T>
|
|
65
|
+
>
|
|
59
66
|
class var_opt_sketch {
|
|
60
67
|
|
|
61
68
|
public:
|
|
62
|
-
static const resize_factor DEFAULT_RESIZE_FACTOR =
|
|
63
|
-
static const uint32_t MAX_K =
|
|
69
|
+
static const resize_factor DEFAULT_RESIZE_FACTOR = var_opt_constants::DEFAULT_RESIZE_FACTOR;
|
|
70
|
+
static const uint32_t MAX_K = var_opt_constants::MAX_K;
|
|
64
71
|
|
|
65
|
-
explicit var_opt_sketch(uint32_t k,
|
|
72
|
+
explicit var_opt_sketch(uint32_t k,
|
|
73
|
+
resize_factor rf = var_opt_constants::DEFAULT_RESIZE_FACTOR,
|
|
74
|
+
const A& allocator = A());
|
|
66
75
|
var_opt_sketch(const var_opt_sketch& other);
|
|
67
76
|
var_opt_sketch(var_opt_sketch&& other) noexcept;
|
|
68
77
|
|
|
@@ -130,18 +139,20 @@ class var_opt_sketch {
|
|
|
130
139
|
/**
|
|
131
140
|
* Computes size needed to serialize the current state of the sketch.
|
|
132
141
|
* This version is for fixed-size arithmetic types (integral and floating point).
|
|
142
|
+
* @param instance of a SerDe
|
|
133
143
|
* @return size in bytes needed to serialize this sketch
|
|
134
144
|
*/
|
|
135
|
-
template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
|
|
136
|
-
inline size_t get_serialized_size_bytes() const;
|
|
145
|
+
template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
|
|
146
|
+
inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
|
|
137
147
|
|
|
138
148
|
/**
|
|
139
149
|
* Computes size needed to serialize the current state of the sketch.
|
|
140
150
|
* This version is for all other types and can be expensive since every item needs to be looked at.
|
|
151
|
+
* @param instance of a SerDe
|
|
141
152
|
* @return size in bytes needed to serialize this sketch
|
|
142
153
|
*/
|
|
143
|
-
template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
|
|
144
|
-
inline size_t get_serialized_size_bytes() const;
|
|
154
|
+
template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
|
|
155
|
+
inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
|
|
145
156
|
|
|
146
157
|
// This is a convenience alias for users
|
|
147
158
|
// The type returned by the following serialize method
|
|
@@ -153,30 +164,61 @@ class var_opt_sketch {
|
|
|
153
164
|
* It is a blank space of a given size.
|
|
154
165
|
* This header is used in Datasketches PostgreSQL extension.
|
|
155
166
|
* @param header_size_bytes space to reserve in front of the sketch
|
|
167
|
+
* @param instance of a SerDe
|
|
156
168
|
*/
|
|
157
|
-
|
|
169
|
+
template<typename SerDe = S>
|
|
170
|
+
vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
|
|
158
171
|
|
|
159
172
|
/**
|
|
160
173
|
* This method serializes the sketch into a given stream in a binary form
|
|
161
174
|
* @param os output stream
|
|
175
|
+
* @param instance of a SerDe
|
|
162
176
|
*/
|
|
163
|
-
|
|
177
|
+
template<typename SerDe = S>
|
|
178
|
+
void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
|
|
164
179
|
|
|
165
180
|
/**
|
|
166
181
|
* This method deserializes a sketch from a given stream.
|
|
167
182
|
* @param is input stream
|
|
183
|
+
* @param instance of an Allocator
|
|
168
184
|
* @return an instance of a sketch
|
|
185
|
+
*
|
|
186
|
+
* Deprecated, to be removed in the next major version
|
|
169
187
|
*/
|
|
170
188
|
static var_opt_sketch deserialize(std::istream& is, const A& allocator = A());
|
|
171
189
|
|
|
190
|
+
/**
|
|
191
|
+
* This method deserializes a sketch from a given stream.
|
|
192
|
+
* @param is input stream
|
|
193
|
+
* @param instance of a SerDe
|
|
194
|
+
* @param instance of an Allocator
|
|
195
|
+
* @return an instance of a sketch
|
|
196
|
+
*/
|
|
197
|
+
template<typename SerDe = S>
|
|
198
|
+
static var_opt_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
|
|
199
|
+
|
|
172
200
|
/**
|
|
173
201
|
* This method deserializes a sketch from a given array of bytes.
|
|
174
202
|
* @param bytes pointer to the array of bytes
|
|
175
203
|
* @param size the size of the array
|
|
204
|
+
* @param instance of an Allocator
|
|
176
205
|
* @return an instance of a sketch
|
|
206
|
+
*
|
|
207
|
+
* Deprecated, to be removed in the next major version
|
|
177
208
|
*/
|
|
178
209
|
static var_opt_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
|
|
179
210
|
|
|
211
|
+
/**
|
|
212
|
+
* This method deserializes a sketch from a given array of bytes.
|
|
213
|
+
* @param bytes pointer to the array of bytes
|
|
214
|
+
* @param size the size of the array
|
|
215
|
+
* @param instance of a SerDe
|
|
216
|
+
* @param instance of an Allocator
|
|
217
|
+
* @return an instance of a sketch
|
|
218
|
+
*/
|
|
219
|
+
template<typename SerDe = S>
|
|
220
|
+
static var_opt_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
|
|
221
|
+
|
|
180
222
|
/**
|
|
181
223
|
* Prints a summary of the sketch.
|
|
182
224
|
* @return the summary as a string
|
|
@@ -25,6 +25,7 @@
|
|
|
25
25
|
#include <cmath>
|
|
26
26
|
#include <random>
|
|
27
27
|
#include <algorithm>
|
|
28
|
+
#include <stdexcept>
|
|
28
29
|
|
|
29
30
|
#include "var_opt_sketch.hpp"
|
|
30
31
|
#include "serde.hpp"
|
|
@@ -128,7 +129,7 @@ var_opt_sketch<T,S,A>::var_opt_sketch(T* data, double* weights, size_t len,
|
|
|
128
129
|
r_(r_count),
|
|
129
130
|
n_(n),
|
|
130
131
|
total_wt_r_(total_wt_r),
|
|
131
|
-
rf_(DEFAULT_RESIZE_FACTOR),
|
|
132
|
+
rf_(var_opt_constants::DEFAULT_RESIZE_FACTOR),
|
|
132
133
|
curr_items_alloc_(len),
|
|
133
134
|
filled_data_(n > k),
|
|
134
135
|
allocator_(allocator),
|
|
@@ -311,8 +312,8 @@ var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(var_opt_sketch&& other)
|
|
|
311
312
|
|
|
312
313
|
// implementation for fixed-size arithmetic types (integral and floating point)
|
|
313
314
|
template<typename T, typename S, typename A>
|
|
314
|
-
template<typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
|
|
315
|
-
size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
|
|
315
|
+
template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
|
|
316
|
+
size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes(const SerDe&) const {
|
|
316
317
|
if (is_empty()) { return PREAMBLE_LONGS_EMPTY << 3; }
|
|
317
318
|
size_t num_bytes = (r_ == 0 ? PREAMBLE_LONGS_WARMUP : PREAMBLE_LONGS_FULL) << 3;
|
|
318
319
|
num_bytes += h_ * sizeof(double); // weights
|
|
@@ -325,8 +326,8 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
|
|
|
325
326
|
|
|
326
327
|
// implementation for all other types
|
|
327
328
|
template<typename T, typename S, typename A>
|
|
328
|
-
template<typename TT, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
|
|
329
|
-
size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
|
|
329
|
+
template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
|
|
330
|
+
size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes(const SerDe& sd) const {
|
|
330
331
|
if (is_empty()) { return PREAMBLE_LONGS_EMPTY << 3; }
|
|
331
332
|
size_t num_bytes = (r_ == 0 ? PREAMBLE_LONGS_WARMUP : PREAMBLE_LONGS_FULL) << 3;
|
|
332
333
|
num_bytes += h_ * sizeof(double); // weights
|
|
@@ -335,13 +336,14 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
|
|
|
335
336
|
}
|
|
336
337
|
// must iterate over the items
|
|
337
338
|
for (auto it: *this)
|
|
338
|
-
num_bytes +=
|
|
339
|
+
num_bytes += sd.size_of_item(it.first);
|
|
339
340
|
return num_bytes;
|
|
340
341
|
}
|
|
341
342
|
|
|
342
343
|
template<typename T, typename S, typename A>
|
|
343
|
-
|
|
344
|
-
|
|
344
|
+
template<typename SerDe>
|
|
345
|
+
std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
|
|
346
|
+
const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
|
|
345
347
|
std::vector<uint8_t, AllocU8<A>> bytes(size, 0, allocator_);
|
|
346
348
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
|
347
349
|
uint8_t* end_ptr = ptr + size;
|
|
@@ -400,8 +402,8 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
|
|
|
400
402
|
}
|
|
401
403
|
|
|
402
404
|
// write the sample items, skipping the gap. Either h_ or r_ may be 0
|
|
403
|
-
ptr +=
|
|
404
|
-
ptr +=
|
|
405
|
+
ptr += sd.serialize(ptr, end_ptr - ptr, data_, h_);
|
|
406
|
+
ptr += sd.serialize(ptr, end_ptr - ptr, &data_[h_ + 1], r_);
|
|
405
407
|
}
|
|
406
408
|
|
|
407
409
|
size_t bytes_written = ptr - bytes.data();
|
|
@@ -413,7 +415,8 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
|
|
|
413
415
|
}
|
|
414
416
|
|
|
415
417
|
template<typename T, typename S, typename A>
|
|
416
|
-
|
|
418
|
+
template<typename SerDe>
|
|
419
|
+
void var_opt_sketch<T,S,A>::serialize(std::ostream& os, const SerDe& sd) const {
|
|
417
420
|
const bool empty = (h_ == 0) && (r_ == 0);
|
|
418
421
|
|
|
419
422
|
const uint8_t preLongs = (empty ? PREAMBLE_LONGS_EMPTY
|
|
@@ -469,13 +472,19 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
|
|
|
469
472
|
}
|
|
470
473
|
|
|
471
474
|
// write the sample items, skipping the gap. Either h_ or r_ may be 0
|
|
472
|
-
|
|
473
|
-
|
|
475
|
+
sd.serialize(os, data_, h_);
|
|
476
|
+
sd.serialize(os, &data_[h_ + 1], r_);
|
|
474
477
|
}
|
|
475
478
|
}
|
|
476
479
|
|
|
477
480
|
template<typename T, typename S, typename A>
|
|
478
481
|
var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size_t size, const A& allocator) {
|
|
482
|
+
return deserialize(bytes, size, S(), allocator);
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
template<typename T, typename S, typename A>
|
|
486
|
+
template<typename SerDe>
|
|
487
|
+
var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
|
|
479
488
|
ensure_minimum_memory(size, 8);
|
|
480
489
|
const char* ptr = static_cast<const char*>(bytes);
|
|
481
490
|
const char* base = ptr;
|
|
@@ -559,10 +568,10 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
|
|
|
559
568
|
items_deleter deleter(array_size, allocator);
|
|
560
569
|
std::unique_ptr<T, items_deleter> items(A(allocator).allocate(array_size), deleter);
|
|
561
570
|
|
|
562
|
-
ptr +=
|
|
571
|
+
ptr += sd.deserialize(ptr, end_ptr - ptr, items.get(), h);
|
|
563
572
|
items.get_deleter().set_h(h); // serde didn't throw, so the items are now valid
|
|
564
573
|
|
|
565
|
-
ptr +=
|
|
574
|
+
ptr += sd.deserialize(ptr, end_ptr - ptr, &(items.get()[h + 1]), r);
|
|
566
575
|
items.get_deleter().set_r(r); // serde didn't throw, so the items are now valid
|
|
567
576
|
|
|
568
577
|
return var_opt_sketch(k, h, (r > 0 ? 1 : 0), r, n, total_wt_r, rf, array_size, false,
|
|
@@ -571,6 +580,12 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
|
|
|
571
580
|
|
|
572
581
|
template<typename T, typename S, typename A>
|
|
573
582
|
var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const A& allocator) {
|
|
583
|
+
return deserialize(is, S(), allocator);
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
template<typename T, typename S, typename A>
|
|
587
|
+
template<typename SerDe>
|
|
588
|
+
var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
|
|
574
589
|
const auto first_byte = read<uint8_t>(is);
|
|
575
590
|
uint8_t preamble_longs = first_byte & 0x3f;
|
|
576
591
|
const resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
|
|
@@ -640,10 +655,10 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const
|
|
|
640
655
|
items_deleter deleter(array_size, allocator);
|
|
641
656
|
std::unique_ptr<T, items_deleter> items(A(allocator).allocate(array_size), deleter);
|
|
642
657
|
|
|
643
|
-
|
|
658
|
+
sd.deserialize(is, items.get(), h); // aka &data_[0]
|
|
644
659
|
items.get_deleter().set_h(h); // serde didn't throw, so the items are now valid
|
|
645
660
|
|
|
646
|
-
|
|
661
|
+
sd.deserialize(is, &(items.get()[h + 1]), r);
|
|
647
662
|
items.get_deleter().set_r(r); // serde didn't throw, so the items are now valid
|
|
648
663
|
|
|
649
664
|
if (!is.good())
|
|
@@ -731,8 +746,10 @@ void var_opt_sketch<T,S,A>::update(T&& item, double weight) {
|
|
|
731
746
|
|
|
732
747
|
template<typename T, typename S, typename A>
|
|
733
748
|
string<A> var_opt_sketch<T,S,A>::to_string() const {
|
|
734
|
-
|
|
735
|
-
|
|
749
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
|
750
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
|
751
|
+
std::ostringstream os;
|
|
752
|
+
os << "### VarOpt SUMMARY:" << std::endl;
|
|
736
753
|
os << " k : " << k_ << std::endl;
|
|
737
754
|
os << " h : " << h_ << std::endl;
|
|
738
755
|
os << " r : " << r_ << std::endl;
|
|
@@ -740,24 +757,28 @@ string<A> var_opt_sketch<T,S,A>::to_string() const {
|
|
|
740
757
|
os << " Current size : " << curr_items_alloc_ << std::endl;
|
|
741
758
|
os << " Resize factor: " << (1 << rf_) << std::endl;
|
|
742
759
|
os << "### END SKETCH SUMMARY" << std::endl;
|
|
743
|
-
return os.str();
|
|
760
|
+
return string<A>(os.str().c_str(), allocator_);
|
|
744
761
|
}
|
|
745
762
|
|
|
746
763
|
template<typename T, typename S, typename A>
|
|
747
764
|
string<A> var_opt_sketch<T,S,A>::items_to_string() const {
|
|
748
|
-
|
|
765
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
|
766
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
|
767
|
+
std::ostringstream os;
|
|
749
768
|
os << "### Sketch Items" << std::endl;
|
|
750
769
|
int idx = 0;
|
|
751
770
|
for (auto record : *this) {
|
|
752
771
|
os << idx << ": " << record.first << "\twt = " << record.second << std::endl;
|
|
753
772
|
++idx;
|
|
754
773
|
}
|
|
755
|
-
return os.str();
|
|
774
|
+
return string<A>(os.str().c_str(), allocator_);
|
|
756
775
|
}
|
|
757
776
|
|
|
758
777
|
template<typename T, typename S, typename A>
|
|
759
778
|
string<A> var_opt_sketch<T,S,A>::items_to_string(bool print_gap) const {
|
|
760
|
-
|
|
779
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
|
780
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
|
781
|
+
std::ostringstream os;
|
|
761
782
|
os << "### Sketch Items" << std::endl;
|
|
762
783
|
const uint32_t array_length = (n_ < k_ ? n_ : k_ + 1);
|
|
763
784
|
for (uint32_t i = 0, display_idx = 0; i < array_length; ++i) {
|
|
@@ -774,7 +795,7 @@ string<A> var_opt_sketch<T,S,A>::items_to_string(bool print_gap) const {
|
|
|
774
795
|
++display_idx;
|
|
775
796
|
}
|
|
776
797
|
}
|
|
777
|
-
return os.str();
|
|
798
|
+
return string<A>(os.str().c_str(), allocator_);
|
|
778
799
|
}
|
|
779
800
|
|
|
780
801
|
template<typename T, typename S, typename A>
|
|
@@ -1677,16 +1698,6 @@ bool var_opt_sketch<T, S, A>::iterator::get_mark() const {
|
|
|
1677
1698
|
return sk_->marks_ == nullptr ? false : sk_->marks_[idx_];
|
|
1678
1699
|
}
|
|
1679
1700
|
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
// ******************** MOVE TO COMMON UTILS AREA EVENTUALLY *********************
|
|
1683
|
-
|
|
1684
|
-
namespace random_utils {
|
|
1685
|
-
static std::random_device rd; // possibly unsafe in MinGW with GCC < 9.2
|
|
1686
|
-
static std::mt19937_64 rand(rd());
|
|
1687
|
-
static std::uniform_real_distribution<> next_double(0.0, 1.0);
|
|
1688
|
-
}
|
|
1689
|
-
|
|
1690
1701
|
/**
|
|
1691
1702
|
* Checks if target sampling allocation is more than 50% of max sampling size.
|
|
1692
1703
|
* If so, returns max sampling size, otherwise passes through target size.
|
|
@@ -45,7 +45,11 @@ template<typename A> using AllocU8 = typename std::allocator_traits<A>::template
|
|
|
45
45
|
* author Kevin Lang
|
|
46
46
|
* author Jon Malkin
|
|
47
47
|
*/
|
|
48
|
-
template
|
|
48
|
+
template<
|
|
49
|
+
typename T,
|
|
50
|
+
typename S = serde<T>, // deprecated, to be removed in the next major version
|
|
51
|
+
typename A = std::allocator<T>
|
|
52
|
+
>
|
|
49
53
|
class var_opt_union {
|
|
50
54
|
|
|
51
55
|
public:
|
|
@@ -88,14 +92,16 @@ public:
|
|
|
88
92
|
/**
|
|
89
93
|
* Computes size needed to serialize the current state of the union.
|
|
90
94
|
* This version is for all other types and can be expensive since every item needs to be looked at.
|
|
95
|
+
* @param instance of a SerDe
|
|
91
96
|
* @return size in bytes needed to serialize this sketch
|
|
92
97
|
*/
|
|
93
|
-
|
|
94
|
-
|
|
98
|
+
template<typename SerDe = S>
|
|
99
|
+
size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
|
|
100
|
+
|
|
95
101
|
// This is a convenience alias for users
|
|
96
102
|
// The type returned by the following serialize method
|
|
97
103
|
typedef vector_u8<A> vector_bytes;
|
|
98
|
-
|
|
104
|
+
|
|
99
105
|
/**
|
|
100
106
|
* NOTE: This method may be deprecated in a future version.
|
|
101
107
|
* This method serializes the sketch as a vector of bytes.
|
|
@@ -103,33 +109,62 @@ public:
|
|
|
103
109
|
* It is a blank space of a given size.
|
|
104
110
|
* This header is used in Datasketches PostgreSQL extension.
|
|
105
111
|
* @param header_size_bytes space to reserve in front of the sketch
|
|
112
|
+
* @param instance of a SerDe
|
|
106
113
|
*/
|
|
107
|
-
|
|
114
|
+
template<typename SerDe = S>
|
|
115
|
+
vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
|
|
108
116
|
|
|
109
117
|
/**
|
|
110
118
|
* NOTE: This method may be deprecated in a future version.
|
|
111
119
|
* This method serializes the sketch into a given stream in a binary form
|
|
112
120
|
* @param os output stream
|
|
121
|
+
* @param instance of a SerDe
|
|
113
122
|
*/
|
|
114
|
-
|
|
123
|
+
template<typename SerDe = S>
|
|
124
|
+
void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
|
|
115
125
|
|
|
116
126
|
/**
|
|
117
127
|
* NOTE: This method may be deprecated in a future version.
|
|
118
128
|
* This method deserializes a union from a given stream.
|
|
119
129
|
* @param is input stream
|
|
130
|
+
* @param instance of an Allocator
|
|
120
131
|
* @return an instance of a union
|
|
121
132
|
*/
|
|
122
133
|
static var_opt_union deserialize(std::istream& is, const A& allocator = A());
|
|
123
134
|
|
|
135
|
+
/**
|
|
136
|
+
* NOTE: This method may be deprecated in a future version.
|
|
137
|
+
* This method deserializes a union from a given stream.
|
|
138
|
+
* @param is input stream
|
|
139
|
+
* @param instance of a SerDe
|
|
140
|
+
* @param instance of an Allocator
|
|
141
|
+
* @return an instance of a union
|
|
142
|
+
*/
|
|
143
|
+
template<typename SerDe = S>
|
|
144
|
+
static var_opt_union deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
|
|
145
|
+
|
|
124
146
|
/**
|
|
125
147
|
* NOTE: This method may be deprecated in a future version.
|
|
126
148
|
* This method deserializes a union from a given array of bytes.
|
|
127
149
|
* @param bytes pointer to the array of bytes
|
|
128
150
|
* @param size the size of the array
|
|
151
|
+
* @param instance of an Allocator
|
|
129
152
|
* @return an instance of a union
|
|
130
153
|
*/
|
|
131
154
|
static var_opt_union deserialize(const void* bytes, size_t size, const A& allocator = A());
|
|
132
155
|
|
|
156
|
+
/**
|
|
157
|
+
* NOTE: This method may be deprecated in a future version.
|
|
158
|
+
* This method deserializes a union from a given array of bytes.
|
|
159
|
+
* @param bytes pointer to the array of bytes
|
|
160
|
+
* @param size the size of the array
|
|
161
|
+
* @param instance of a SerDe
|
|
162
|
+
* @param instance of an Allocator
|
|
163
|
+
* @return an instance of a union
|
|
164
|
+
*/
|
|
165
|
+
template<typename SerDe = S>
|
|
166
|
+
static var_opt_union deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
|
|
167
|
+
|
|
133
168
|
/**
|
|
134
169
|
* Prints a summary of the union as a string.
|
|
135
170
|
* @return the summary as a string
|
|
@@ -24,6 +24,7 @@
|
|
|
24
24
|
|
|
25
25
|
#include <cmath>
|
|
26
26
|
#include <sstream>
|
|
27
|
+
#include <stdexcept>
|
|
27
28
|
|
|
28
29
|
namespace datasketches {
|
|
29
30
|
|
|
@@ -129,6 +130,12 @@ var_opt_union<T,S,A>& var_opt_union<T,S,A>::operator=(var_opt_union&& other) {
|
|
|
129
130
|
|
|
130
131
|
template<typename T, typename S, typename A>
|
|
131
132
|
var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(std::istream& is, const A& allocator) {
|
|
133
|
+
return deserialize(is, S(), allocator);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
template<typename T, typename S, typename A>
|
|
137
|
+
template<typename SerDe>
|
|
138
|
+
var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
|
|
132
139
|
const auto preamble_longs = read<uint8_t>(is);
|
|
133
140
|
const auto serial_version = read<uint8_t>(is);
|
|
134
141
|
const auto family_id = read<uint8_t>(is);
|
|
@@ -155,7 +162,7 @@ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(std::istream& is, const A
|
|
|
155
162
|
const auto outer_tau_numer = read<double>(is);
|
|
156
163
|
const auto outer_tau_denom = read<uint64_t>(is);
|
|
157
164
|
|
|
158
|
-
var_opt_sketch<T,S,A> gadget = var_opt_sketch<T,S,A>::deserialize(is, allocator);
|
|
165
|
+
var_opt_sketch<T,S,A> gadget = var_opt_sketch<T,S,A>::deserialize(is, sd, allocator);
|
|
159
166
|
|
|
160
167
|
if (!is.good())
|
|
161
168
|
throw std::runtime_error("error reading from std::istream");
|
|
@@ -165,6 +172,12 @@ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(std::istream& is, const A
|
|
|
165
172
|
|
|
166
173
|
template<typename T, typename S, typename A>
|
|
167
174
|
var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(const void* bytes, size_t size, const A& allocator) {
|
|
175
|
+
return deserialize(bytes, size, S(), allocator);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
template<typename T, typename S, typename A>
|
|
179
|
+
template<typename SerDe>
|
|
180
|
+
var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
|
|
168
181
|
ensure_minimum_memory(size, 8);
|
|
169
182
|
const char* ptr = static_cast<const char*>(bytes);
|
|
170
183
|
uint8_t preamble_longs;
|
|
@@ -199,22 +212,24 @@ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(const void* bytes, size_t
|
|
|
199
212
|
ptr += copy_from_mem(ptr, outer_tau_denom);
|
|
200
213
|
|
|
201
214
|
const size_t gadget_size = size - (PREAMBLE_LONGS_NON_EMPTY << 3);
|
|
202
|
-
var_opt_sketch<T,S,A> gadget = var_opt_sketch<T,S,A>::deserialize(ptr, gadget_size, allocator);
|
|
215
|
+
var_opt_sketch<T,S,A> gadget = var_opt_sketch<T,S,A>::deserialize(ptr, gadget_size, sd, allocator);
|
|
203
216
|
|
|
204
217
|
return var_opt_union<T,S,A>(items_seen, outer_tau_numer, outer_tau_denom, max_k, std::move(gadget));
|
|
205
218
|
}
|
|
206
219
|
|
|
207
220
|
template<typename T, typename S, typename A>
|
|
208
|
-
|
|
221
|
+
template<typename SerDe>
|
|
222
|
+
size_t var_opt_union<T,S,A>::get_serialized_size_bytes(const SerDe& sd) const {
|
|
209
223
|
if (n_ == 0) {
|
|
210
224
|
return PREAMBLE_LONGS_EMPTY << 3;
|
|
211
225
|
} else {
|
|
212
|
-
return (PREAMBLE_LONGS_NON_EMPTY << 3) + gadget_.get_serialized_size_bytes();
|
|
226
|
+
return (PREAMBLE_LONGS_NON_EMPTY << 3) + gadget_.get_serialized_size_bytes(sd);
|
|
213
227
|
}
|
|
214
228
|
}
|
|
215
229
|
|
|
216
230
|
template<typename T, typename S, typename A>
|
|
217
|
-
|
|
231
|
+
template<typename SerDe>
|
|
232
|
+
void var_opt_union<T,S,A>::serialize(std::ostream& os, const SerDe& sd) const {
|
|
218
233
|
bool empty = (n_ == 0);
|
|
219
234
|
|
|
220
235
|
const uint8_t serialization_version(SER_VER);
|
|
@@ -240,13 +255,14 @@ void var_opt_union<T,S,A>::serialize(std::ostream& os) const {
|
|
|
240
255
|
write(os, n_);
|
|
241
256
|
write(os, outer_tau_numer_);
|
|
242
257
|
write(os, outer_tau_denom_);
|
|
243
|
-
gadget_.serialize(os);
|
|
258
|
+
gadget_.serialize(os, sd);
|
|
244
259
|
}
|
|
245
260
|
}
|
|
246
261
|
|
|
247
262
|
template<typename T, typename S, typename A>
|
|
248
|
-
|
|
249
|
-
|
|
263
|
+
template<typename SerDe>
|
|
264
|
+
std::vector<uint8_t, AllocU8<A>> var_opt_union<T,S,A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
|
|
265
|
+
const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
|
|
250
266
|
std::vector<uint8_t, AllocU8<A>> bytes(size, 0, gadget_.allocator_);
|
|
251
267
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
|
252
268
|
|
|
@@ -278,7 +294,7 @@ std::vector<uint8_t, AllocU8<A>> var_opt_union<T,S,A>::serialize(unsigned header
|
|
|
278
294
|
ptr += copy_to_mem(outer_tau_numer_, ptr);
|
|
279
295
|
ptr += copy_to_mem(outer_tau_denom_, ptr);
|
|
280
296
|
|
|
281
|
-
auto gadget_bytes = gadget_.serialize();
|
|
297
|
+
auto gadget_bytes = gadget_.serialize(0, sd);
|
|
282
298
|
ptr += copy_to_mem(gadget_bytes.data(), ptr, gadget_bytes.size() * sizeof(uint8_t));
|
|
283
299
|
}
|
|
284
300
|
|
|
@@ -295,14 +311,16 @@ void var_opt_union<T,S,A>::reset() {
|
|
|
295
311
|
|
|
296
312
|
template<typename T, typename S, typename A>
|
|
297
313
|
string<A> var_opt_union<T,S,A>::to_string() const {
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
314
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
|
315
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
|
316
|
+
std::ostringstream os;
|
|
317
|
+
os << "### VarOpt Union SUMMARY:" << std::endl;
|
|
318
|
+
os << " n : " << n_ << std::endl;
|
|
301
319
|
os << " Max k : " << max_k_ << std::endl;
|
|
302
|
-
os << " Gadget Summary:
|
|
320
|
+
os << " Gadget Summary:" << std::endl;
|
|
303
321
|
os << gadget_.to_string();
|
|
304
|
-
os << "### END VarOpt Union SUMMARY
|
|
305
|
-
return os.str();
|
|
322
|
+
os << "### END VarOpt Union SUMMARY" << std::endl;
|
|
323
|
+
return string<A>(os.str().c_str(), gadget_.allocator_);
|
|
306
324
|
}
|
|
307
325
|
|
|
308
326
|
template<typename T, typename S, typename A>
|
|
@@ -39,7 +39,7 @@ TEST_CASE("varopt allocation test", "[var_opt_sketch]") {
|
|
|
39
39
|
var_opt_test_sketch sk1(10, var_opt_test_sketch::DEFAULT_RESIZE_FACTOR, 0);
|
|
40
40
|
for (int i = 0; i < 100; ++i) sk1.update(i);
|
|
41
41
|
auto bytes1 = sk1.serialize();
|
|
42
|
-
auto sk2 = var_opt_test_sketch::deserialize(bytes1.data(), bytes1.size(), 0);
|
|
42
|
+
auto sk2 = var_opt_test_sketch::deserialize(bytes1.data(), bytes1.size(), test_type_serde(), 0);
|
|
43
43
|
|
|
44
44
|
std::stringstream ss;
|
|
45
45
|
sk1.serialize(ss);
|
|
@@ -51,7 +51,7 @@ TEST_CASE("varopt allocation test", "[var_opt_sketch]") {
|
|
|
51
51
|
u1.update(sk3);
|
|
52
52
|
|
|
53
53
|
auto bytes2 = u1.serialize();
|
|
54
|
-
auto u2 = var_opt_test_union::deserialize(bytes2.data(), bytes2.size(), 0);
|
|
54
|
+
auto u2 = var_opt_test_union::deserialize(bytes2.data(), bytes2.size(), test_type_serde(), 0);
|
|
55
55
|
}
|
|
56
56
|
REQUIRE(test_allocator_total_bytes == 0);
|
|
57
57
|
REQUIRE(test_allocator_net_allocations == 0);
|
|
@@ -49,8 +49,9 @@ class CMakeBuild(build_ext):
|
|
|
49
49
|
os.path.dirname(self.get_ext_fullpath(ext.name)))
|
|
50
50
|
cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir]
|
|
51
51
|
cmake_args += ['-DWITH_PYTHON=True']
|
|
52
|
+
cmake_args += ['-DCMAKE_CXX_STANDARD=11']
|
|
52
53
|
# ensure we use a consistent python version
|
|
53
|
-
cmake_args += ['-
|
|
54
|
+
cmake_args += ['-DPython3_EXECUTABLE=' + sys.executable]
|
|
54
55
|
cfg = 'Debug' if self.debug else 'Release'
|
|
55
56
|
build_args = ['--config', cfg]
|
|
56
57
|
|
|
@@ -59,7 +60,8 @@ class CMakeBuild(build_ext):
|
|
|
59
60
|
cfg.upper(),
|
|
60
61
|
extdir)]
|
|
61
62
|
if sys.maxsize > 2**32:
|
|
62
|
-
cmake_args += ['-
|
|
63
|
+
cmake_args += ['-T', 'host=x64']
|
|
64
|
+
cmake_args += ['-DCMAKE_GENERATOR_PLATFORM=x64']
|
|
63
65
|
build_args += ['--', '/m']
|
|
64
66
|
else:
|
|
65
67
|
cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
|
|
@@ -74,23 +76,24 @@ class CMakeBuild(build_ext):
|
|
|
74
76
|
subprocess.check_call(['cmake', ext.sourcedir] + cmake_args,
|
|
75
77
|
cwd=self.build_temp, env=env)
|
|
76
78
|
subprocess.check_call(['cmake', '--build', '.', '--target', 'python'] + build_args,
|
|
77
|
-
cwd=self.build_temp)
|
|
79
|
+
cwd=self.build_temp, env=env)
|
|
78
80
|
print() # add an empty line to pretty print
|
|
79
81
|
|
|
80
82
|
setup(
|
|
81
83
|
name='datasketches',
|
|
82
|
-
version='3.
|
|
83
|
-
author='Apache
|
|
84
|
+
version='3.4.0',
|
|
85
|
+
author='Apache Software Foundation',
|
|
84
86
|
author_email='dev@datasketches.apache.org',
|
|
85
|
-
description='
|
|
87
|
+
description='The Apache DataSketches Library for Python',
|
|
86
88
|
license='Apache License 2.0',
|
|
87
89
|
url='http://datasketches.apache.org',
|
|
88
90
|
long_description=open('python/README.md').read(),
|
|
91
|
+
long_description_content_type='text/markdown',
|
|
89
92
|
packages=find_packages('python'), # python pacakges only in this dir
|
|
90
93
|
package_dir={'':'python'},
|
|
91
94
|
# may need to add all source paths for sdist packages w/o MANIFEST.in
|
|
92
95
|
ext_modules=[CMakeExtension('datasketches')],
|
|
93
96
|
cmdclass={'build_ext': CMakeBuild},
|
|
94
|
-
|
|
97
|
+
install_requires=['numpy'],
|
|
95
98
|
zip_safe=False
|
|
96
99
|
)
|