datasketches 0.2.2 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/README.md +8 -8
- data/ext/datasketches/kll_wrapper.cpp +5 -1
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +27 -5
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +18 -13
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +16 -0
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +10 -6
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +30 -12
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +5 -19
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +103 -44
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +110 -130
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +156 -23
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +17 -6
- data/vendor/datasketches-cpp/python/README.md +57 -50
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +49 -14
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +12 -5
- data/vendor/datasketches-cpp/python/tests/kll_test.py +12 -6
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
- data/vendor/datasketches-cpp/req/CMakeLists.txt +6 -21
- data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +66 -61
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +54 -12
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +45 -34
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +33 -15
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
- data/vendor/datasketches-cpp/setup.py +10 -7
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +92 -23
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +3 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +150 -93
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -2
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +9 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +39 -10
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +446 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +429 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -11
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +29 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +34 -14
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +8 -0
- metadata +34 -12
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
|
@@ -23,8 +23,9 @@
|
|
|
23
23
|
#include <functional>
|
|
24
24
|
#include <memory>
|
|
25
25
|
#include <vector>
|
|
26
|
+
#include <cmath>
|
|
26
27
|
|
|
27
|
-
#include "
|
|
28
|
+
#include "quantile_sketch_sorted_view.hpp"
|
|
28
29
|
#include "common_defs.hpp"
|
|
29
30
|
#include "serde.hpp"
|
|
30
31
|
|
|
@@ -35,7 +36,7 @@ namespace datasketches {
|
|
|
35
36
|
* and nearly optimal accuracy per retained item.
|
|
36
37
|
* See <a href="https://arxiv.org/abs/1603.05346v2">Optimal Quantile Approximation in Streams</a>.
|
|
37
38
|
*
|
|
38
|
-
* <p>This is a stochastic streaming sketch that enables near-
|
|
39
|
+
* <p>This is a stochastic streaming sketch that enables near real-time analysis of the
|
|
39
40
|
* approximate distribution of values from a very large stream in a single pass, requiring only
|
|
40
41
|
* that the values are comparable.
|
|
41
42
|
* The analysis is obtained using <i>get_quantile()</i> or <i>get_quantiles()</i> functions or the
|
|
@@ -153,18 +154,28 @@ template<typename A> using vector_u32 = std::vector<uint32_t, AllocU32<A>>;
|
|
|
153
154
|
template<typename A> using AllocD = typename std::allocator_traits<A>::template rebind_alloc<double>;
|
|
154
155
|
template<typename A> using vector_d = std::vector<double, AllocD<A>>;
|
|
155
156
|
|
|
156
|
-
|
|
157
|
+
namespace kll_constants {
|
|
158
|
+
const uint16_t DEFAULT_K = 200;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
template <
|
|
162
|
+
typename T,
|
|
163
|
+
typename C = std::less<T>, // strict weak ordering function (see C++ named requirements: Compare)
|
|
164
|
+
typename S = serde<T>, // deprecated, to be removed in the next major version
|
|
165
|
+
typename A = std::allocator<T>
|
|
166
|
+
>
|
|
157
167
|
class kll_sketch {
|
|
158
168
|
public:
|
|
159
169
|
using value_type = T;
|
|
160
170
|
using comparator = C;
|
|
161
171
|
|
|
162
172
|
static const uint8_t DEFAULT_M = 8;
|
|
163
|
-
|
|
173
|
+
// TODO: Redundant and deprecated. Will be remove din next major version.
|
|
174
|
+
static const uint16_t DEFAULT_K = kll_constants::DEFAULT_K;
|
|
164
175
|
static const uint16_t MIN_K = DEFAULT_M;
|
|
165
176
|
static const uint16_t MAX_K = (1 << 16) - 1;
|
|
166
177
|
|
|
167
|
-
explicit kll_sketch(uint16_t k = DEFAULT_K, const A& allocator = A());
|
|
178
|
+
explicit kll_sketch(uint16_t k = kll_constants::DEFAULT_K, const A& allocator = A());
|
|
168
179
|
kll_sketch(const kll_sketch& other);
|
|
169
180
|
kll_sketch(kll_sketch&& other) noexcept;
|
|
170
181
|
~kll_sketch();
|
|
@@ -173,31 +184,17 @@ class kll_sketch {
|
|
|
173
184
|
|
|
174
185
|
/**
|
|
175
186
|
* Updates this sketch with the given data item.
|
|
176
|
-
* This method takes lvalue.
|
|
177
|
-
* @param value an item from a stream of items
|
|
178
|
-
*/
|
|
179
|
-
void update(const T& value);
|
|
180
|
-
|
|
181
|
-
/**
|
|
182
|
-
* Updates this sketch with the given data item.
|
|
183
|
-
* This method takes rvalue.
|
|
184
187
|
* @param value an item from a stream of items
|
|
185
188
|
*/
|
|
186
|
-
|
|
189
|
+
template<typename FwdT>
|
|
190
|
+
void update(FwdT&& value);
|
|
187
191
|
|
|
188
192
|
/**
|
|
189
193
|
* Merges another sketch into this one.
|
|
190
|
-
* This method takes lvalue.
|
|
191
194
|
* @param other sketch to merge into this one
|
|
192
195
|
*/
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
/**
|
|
196
|
-
* Merges another sketch into this one.
|
|
197
|
-
* This method takes rvalue.
|
|
198
|
-
* @param other sketch to merge into this one
|
|
199
|
-
*/
|
|
200
|
-
void merge(kll_sketch&& other);
|
|
196
|
+
template<typename FwdSk>
|
|
197
|
+
void merge(FwdSk&& other);
|
|
201
198
|
|
|
202
199
|
/**
|
|
203
200
|
* Returns true if this sketch is empty.
|
|
@@ -245,6 +242,12 @@ class kll_sketch {
|
|
|
245
242
|
*/
|
|
246
243
|
T get_max_value() const;
|
|
247
244
|
|
|
245
|
+
/**
|
|
246
|
+
* Returns an instance of the comparator for this sketch.
|
|
247
|
+
* @return comparator
|
|
248
|
+
*/
|
|
249
|
+
C get_comparator() const;
|
|
250
|
+
|
|
248
251
|
/**
|
|
249
252
|
* Returns an approximation to the value of the data item
|
|
250
253
|
* that would be preceded by the given fraction of a hypothetical sorted
|
|
@@ -261,10 +264,13 @@ class kll_sketch {
|
|
|
261
264
|
* These are also called normalized ranks or fractional ranks.
|
|
262
265
|
* If fraction = 0.0, the true minimum value of the stream is returned.
|
|
263
266
|
* If fraction = 1.0, the true maximum value of the stream is returned.
|
|
267
|
+
* If the parameter inclusive=true, the given rank is considered inclusive (includes the weight of an item)
|
|
264
268
|
*
|
|
265
269
|
* @return the approximation to the value at the given fraction
|
|
266
270
|
*/
|
|
267
|
-
T
|
|
271
|
+
using quantile_return_type = typename quantile_sketch_sorted_view<T, C, A>::quantile_return_type;
|
|
272
|
+
template<bool inclusive = false>
|
|
273
|
+
quantile_return_type get_quantile(double fraction) const;
|
|
268
274
|
|
|
269
275
|
/**
|
|
270
276
|
* This is a more efficient multiple-query version of get_quantile().
|
|
@@ -280,10 +286,12 @@ class kll_sketch {
|
|
|
280
286
|
* @param fractions given array of fractional positions in the hypothetical sorted stream.
|
|
281
287
|
* These are also called normalized ranks or fractional ranks.
|
|
282
288
|
* These fractions must be in the interval [0.0, 1.0], inclusive.
|
|
289
|
+
* If the parameter inclusive=true, the given fractions are considered inclusive (include weights of items)
|
|
283
290
|
*
|
|
284
291
|
* @return array of approximations to the given fractions in the same order as given fractions
|
|
285
292
|
* in the input array.
|
|
286
293
|
*/
|
|
294
|
+
template<bool inclusive = false>
|
|
287
295
|
std::vector<T, A> get_quantiles(const double* fractions, uint32_t size) const;
|
|
288
296
|
|
|
289
297
|
/**
|
|
@@ -299,11 +307,15 @@ class kll_sketch {
|
|
|
299
307
|
*
|
|
300
308
|
* @return array of approximations to the given number of evenly-spaced fractional ranks.
|
|
301
309
|
*/
|
|
310
|
+
template<bool inclusive = false>
|
|
302
311
|
std::vector<T, A> get_quantiles(uint32_t num) const;
|
|
303
312
|
|
|
304
313
|
/**
|
|
305
314
|
* Returns an approximation to the normalized (fractional) rank of the given value from 0 to 1,
|
|
306
315
|
* inclusive.
|
|
316
|
+
* With the template parameter inclusive=true the weight of the given value is included into the rank.
|
|
317
|
+
* Otherwise the rank equals the sum of the weights of all values that are less than the given value
|
|
318
|
+
* according to the comparator C.
|
|
307
319
|
*
|
|
308
320
|
* <p>The resulting approximation has a probabilistic guarantee that can be obtained from the
|
|
309
321
|
* get_normalized_rank_error(false) function.
|
|
@@ -313,6 +325,7 @@ class kll_sketch {
|
|
|
313
325
|
* @param value to be ranked
|
|
314
326
|
* @return an approximate rank of the given value
|
|
315
327
|
*/
|
|
328
|
+
template<bool inclusive = false>
|
|
316
329
|
double get_rank(const T& value) const;
|
|
317
330
|
|
|
318
331
|
/**
|
|
@@ -333,9 +346,12 @@ class kll_sketch {
|
|
|
333
346
|
*
|
|
334
347
|
* @return an array of m+1 doubles each of which is an approximation
|
|
335
348
|
* to the fraction of the input stream values (the mass) that fall into one of those intervals.
|
|
336
|
-
*
|
|
337
|
-
* split point, with the exception that the last interval will include maximum value.
|
|
349
|
+
* If the template parameter inclusive=false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
|
|
350
|
+
* split point, with the exception that the last interval will include the maximum value.
|
|
351
|
+
* If the template parameter inclusive=true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
|
|
352
|
+
* split point.
|
|
338
353
|
*/
|
|
354
|
+
template<bool inclusive = false>
|
|
339
355
|
vector_d<A> get_PMF(const T* split_points, uint32_t size) const;
|
|
340
356
|
|
|
341
357
|
/**
|
|
@@ -359,6 +375,7 @@ class kll_sketch {
|
|
|
359
375
|
* CDF array is the sum of the returned values in positions 0 through j of the returned PMF
|
|
360
376
|
* array.
|
|
361
377
|
*/
|
|
378
|
+
template<bool inclusive = false>
|
|
362
379
|
vector_d<A> get_CDF(const T* split_points, uint32_t size) const;
|
|
363
380
|
|
|
364
381
|
/**
|
|
@@ -373,18 +390,20 @@ class kll_sketch {
|
|
|
373
390
|
/**
|
|
374
391
|
* Computes size needed to serialize the current state of the sketch.
|
|
375
392
|
* This version is for fixed-size arithmetic types (integral and floating point).
|
|
393
|
+
* @param instance of a SerDe
|
|
376
394
|
* @return size in bytes needed to serialize this sketch
|
|
377
395
|
*/
|
|
378
|
-
template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
|
|
379
|
-
size_t get_serialized_size_bytes() const;
|
|
396
|
+
template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
|
|
397
|
+
size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
|
|
380
398
|
|
|
381
399
|
/**
|
|
382
400
|
* Computes size needed to serialize the current state of the sketch.
|
|
383
401
|
* This version is for all other types and can be expensive since every item needs to be looked at.
|
|
402
|
+
* @param instance of a SerDe
|
|
384
403
|
* @return size in bytes needed to serialize this sketch
|
|
385
404
|
*/
|
|
386
|
-
template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
|
|
387
|
-
size_t get_serialized_size_bytes() const;
|
|
405
|
+
template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
|
|
406
|
+
size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
|
|
388
407
|
|
|
389
408
|
/**
|
|
390
409
|
* Returns upper bound on the serialized size of a sketch given a parameter <em>k</em> and stream
|
|
@@ -416,8 +435,10 @@ class kll_sketch {
|
|
|
416
435
|
/**
|
|
417
436
|
* This method serializes the sketch into a given stream in a binary form
|
|
418
437
|
* @param os output stream
|
|
438
|
+
* @param instance of a SerDe
|
|
419
439
|
*/
|
|
420
|
-
|
|
440
|
+
template<typename SerDe = S>
|
|
441
|
+
void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
|
|
421
442
|
|
|
422
443
|
// This is a convenience alias for users
|
|
423
444
|
// The type returned by the following serialize method
|
|
@@ -429,23 +450,53 @@ class kll_sketch {
|
|
|
429
450
|
* It is a blank space of a given size.
|
|
430
451
|
* This header is used in Datasketches PostgreSQL extension.
|
|
431
452
|
* @param header_size_bytes space to reserve in front of the sketch
|
|
453
|
+
* @param instance of a SerDe
|
|
454
|
+
* @return serialized sketch as a vector of bytes
|
|
432
455
|
*/
|
|
433
|
-
|
|
456
|
+
template<typename SerDe = S>
|
|
457
|
+
vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
|
|
434
458
|
|
|
435
459
|
/**
|
|
436
460
|
* This method deserializes a sketch from a given stream.
|
|
437
461
|
* @param is input stream
|
|
462
|
+
* @param instance of an Allocator
|
|
438
463
|
* @return an instance of a sketch
|
|
464
|
+
*
|
|
465
|
+
* Deprecated, to be removed in the next major version
|
|
439
466
|
*/
|
|
440
|
-
static kll_sketch
|
|
467
|
+
static kll_sketch deserialize(std::istream& is, const A& allocator = A());
|
|
468
|
+
|
|
469
|
+
/**
|
|
470
|
+
* This method deserializes a sketch from a given stream.
|
|
471
|
+
* @param is input stream
|
|
472
|
+
* @param instance of a SerDe
|
|
473
|
+
* @param instance of an Allocator
|
|
474
|
+
* @return an instance of a sketch
|
|
475
|
+
*/
|
|
476
|
+
template<typename SerDe = S>
|
|
477
|
+
static kll_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
|
|
478
|
+
|
|
479
|
+
/**
|
|
480
|
+
* This method deserializes a sketch from a given array of bytes.
|
|
481
|
+
* @param bytes pointer to the array of bytes
|
|
482
|
+
* @param size the size of the array
|
|
483
|
+
* @param instance of an Allocator
|
|
484
|
+
* @return an instance of a sketch
|
|
485
|
+
*
|
|
486
|
+
* Deprecated, to be removed in the next major version
|
|
487
|
+
*/
|
|
488
|
+
static kll_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
|
|
441
489
|
|
|
442
490
|
/**
|
|
443
491
|
* This method deserializes a sketch from a given array of bytes.
|
|
444
492
|
* @param bytes pointer to the array of bytes
|
|
445
493
|
* @param size the size of the array
|
|
494
|
+
* @param instance of a SerDe
|
|
495
|
+
* @param instance of an Allocator
|
|
446
496
|
* @return an instance of a sketch
|
|
447
497
|
*/
|
|
448
|
-
|
|
498
|
+
template<typename SerDe = S>
|
|
499
|
+
static kll_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
|
|
449
500
|
|
|
450
501
|
/*
|
|
451
502
|
* Gets the normalized rank error given k and pmf.
|
|
@@ -467,6 +518,9 @@ class kll_sketch {
|
|
|
467
518
|
const_iterator begin() const;
|
|
468
519
|
const_iterator end() const;
|
|
469
520
|
|
|
521
|
+
template<bool inclusive = false>
|
|
522
|
+
quantile_sketch_sorted_view<T, C, A> get_sorted_view(bool cumulative) const;
|
|
523
|
+
|
|
470
524
|
#ifdef KLL_VALIDATION
|
|
471
525
|
uint8_t get_num_levels() { return num_levels_; }
|
|
472
526
|
uint32_t* get_levels() { return levels_; }
|
|
@@ -475,7 +529,7 @@ class kll_sketch {
|
|
|
475
529
|
|
|
476
530
|
private:
|
|
477
531
|
/* Serialized sketch layout:
|
|
478
|
-
*
|
|
532
|
+
* Addr:
|
|
479
533
|
* || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
|
|
480
534
|
* 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts |
|
|
481
535
|
* || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 |
|
|
@@ -510,8 +564,6 @@ class kll_sketch {
|
|
|
510
564
|
T* max_value_;
|
|
511
565
|
bool is_level_zero_sorted_;
|
|
512
566
|
|
|
513
|
-
friend class kll_quantile_calculator<T, C, A>;
|
|
514
|
-
|
|
515
567
|
// for deserialization
|
|
516
568
|
class item_deleter;
|
|
517
569
|
class items_deleter;
|
|
@@ -530,15 +582,21 @@ class kll_sketch {
|
|
|
530
582
|
uint8_t find_level_to_compact() const;
|
|
531
583
|
void add_empty_top_level_to_completely_full_sketch();
|
|
532
584
|
void sort_level_zero();
|
|
533
|
-
|
|
585
|
+
|
|
586
|
+
template<bool inclusive>
|
|
534
587
|
vector_d<A> get_PMF_or_CDF(const T* split_points, uint32_t size, bool is_CDF) const;
|
|
588
|
+
template<bool inclusive>
|
|
535
589
|
void increment_buckets_unsorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
|
|
536
590
|
const T* split_points, uint32_t size, double* buckets) const;
|
|
591
|
+
template<bool inclusive>
|
|
537
592
|
void increment_buckets_sorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
|
|
538
593
|
const T* split_points, uint32_t size, double* buckets) const;
|
|
594
|
+
|
|
539
595
|
template<typename O> void merge_higher_levels(O&& other, uint64_t final_n);
|
|
540
|
-
|
|
541
|
-
|
|
596
|
+
|
|
597
|
+
template<typename FwdSk>
|
|
598
|
+
void populate_work_arrays(FwdSk&& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels);
|
|
599
|
+
|
|
542
600
|
void assert_correct_total_weight() const;
|
|
543
601
|
uint32_t safe_level_size(uint8_t level) const;
|
|
544
602
|
uint32_t get_num_retained_above_level_zero() const;
|
|
@@ -550,8 +608,9 @@ class kll_sketch {
|
|
|
550
608
|
|
|
551
609
|
// implementations for floating point types
|
|
552
610
|
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
|
|
553
|
-
static TT get_invalid_value() {
|
|
554
|
-
|
|
611
|
+
static const TT& get_invalid_value() {
|
|
612
|
+
static TT value = std::numeric_limits<TT>::quiet_NaN();
|
|
613
|
+
return value;
|
|
555
614
|
}
|
|
556
615
|
|
|
557
616
|
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
|
|
@@ -561,8 +620,8 @@ class kll_sketch {
|
|
|
561
620
|
|
|
562
621
|
// implementations for all other types
|
|
563
622
|
template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
|
|
564
|
-
static TT get_invalid_value() {
|
|
565
|
-
throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of
|
|
623
|
+
static const TT& get_invalid_value() {
|
|
624
|
+
throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of value");
|
|
566
625
|
}
|
|
567
626
|
|
|
568
627
|
template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
|