datasketches 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +1 -1
- data/ext/datasketches/kll_wrapper.cpp +5 -1
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +4 -3
- data/vendor/datasketches-cpp/common/CMakeLists.txt +4 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +14 -0
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +2 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +25 -9
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +0 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +96 -42
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +105 -127
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +94 -25
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
- data/vendor/datasketches-cpp/pyproject.toml +1 -1
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/README.md +7 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +48 -13
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +1 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +10 -4
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
- data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
- data/vendor/datasketches-cpp/req/CMakeLists.txt +0 -2
- data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +62 -59
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +44 -7
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +31 -26
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +25 -9
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +8 -6
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +7 -45
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +29 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +2 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +16 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -0
- metadata +25 -9
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
|
@@ -23,8 +23,9 @@
|
|
|
23
23
|
#include <functional>
|
|
24
24
|
#include <memory>
|
|
25
25
|
#include <vector>
|
|
26
|
+
#include <cmath>
|
|
26
27
|
|
|
27
|
-
#include "
|
|
28
|
+
#include "quantile_sketch_sorted_view.hpp"
|
|
28
29
|
#include "common_defs.hpp"
|
|
29
30
|
#include "serde.hpp"
|
|
30
31
|
|
|
@@ -35,7 +36,7 @@ namespace datasketches {
|
|
|
35
36
|
* and nearly optimal accuracy per retained item.
|
|
36
37
|
* See <a href="https://arxiv.org/abs/1603.05346v2">Optimal Quantile Approximation in Streams</a>.
|
|
37
38
|
*
|
|
38
|
-
* <p>This is a stochastic streaming sketch that enables near-
|
|
39
|
+
* <p>This is a stochastic streaming sketch that enables near real-time analysis of the
|
|
39
40
|
* approximate distribution of values from a very large stream in a single pass, requiring only
|
|
40
41
|
* that the values are comparable.
|
|
41
42
|
* The analysis is obtained using <i>get_quantile()</i> or <i>get_quantiles()</i> functions or the
|
|
@@ -157,7 +158,12 @@ namespace kll_constants {
|
|
|
157
158
|
const uint16_t DEFAULT_K = 200;
|
|
158
159
|
}
|
|
159
160
|
|
|
160
|
-
template <
|
|
161
|
+
template <
|
|
162
|
+
typename T,
|
|
163
|
+
typename C = std::less<T>, // strict weak ordering function (see C++ named requirements: Compare)
|
|
164
|
+
typename S = serde<T>, // deprecated, to be removed in the next major version
|
|
165
|
+
typename A = std::allocator<T>
|
|
166
|
+
>
|
|
161
167
|
class kll_sketch {
|
|
162
168
|
public:
|
|
163
169
|
using value_type = T;
|
|
@@ -178,31 +184,17 @@ class kll_sketch {
|
|
|
178
184
|
|
|
179
185
|
/**
|
|
180
186
|
* Updates this sketch with the given data item.
|
|
181
|
-
* This method takes lvalue.
|
|
182
187
|
* @param value an item from a stream of items
|
|
183
188
|
*/
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
/**
|
|
187
|
-
* Updates this sketch with the given data item.
|
|
188
|
-
* This method takes rvalue.
|
|
189
|
-
* @param value an item from a stream of items
|
|
190
|
-
*/
|
|
191
|
-
void update(T&& value);
|
|
192
|
-
|
|
193
|
-
/**
|
|
194
|
-
* Merges another sketch into this one.
|
|
195
|
-
* This method takes lvalue.
|
|
196
|
-
* @param other sketch to merge into this one
|
|
197
|
-
*/
|
|
198
|
-
void merge(const kll_sketch& other);
|
|
189
|
+
template<typename FwdT>
|
|
190
|
+
void update(FwdT&& value);
|
|
199
191
|
|
|
200
192
|
/**
|
|
201
193
|
* Merges another sketch into this one.
|
|
202
|
-
* This method takes rvalue.
|
|
203
194
|
* @param other sketch to merge into this one
|
|
204
195
|
*/
|
|
205
|
-
|
|
196
|
+
template<typename FwdSk>
|
|
197
|
+
void merge(FwdSk&& other);
|
|
206
198
|
|
|
207
199
|
/**
|
|
208
200
|
* Returns true if this sketch is empty.
|
|
@@ -250,6 +242,12 @@ class kll_sketch {
|
|
|
250
242
|
*/
|
|
251
243
|
T get_max_value() const;
|
|
252
244
|
|
|
245
|
+
/**
|
|
246
|
+
* Returns an instance of the comparator for this sketch.
|
|
247
|
+
* @return comparator
|
|
248
|
+
*/
|
|
249
|
+
C get_comparator() const;
|
|
250
|
+
|
|
253
251
|
/**
|
|
254
252
|
* Returns an approximation to the value of the data item
|
|
255
253
|
* that would be preceded by the given fraction of a hypothetical sorted
|
|
@@ -266,10 +264,13 @@ class kll_sketch {
|
|
|
266
264
|
* These are also called normalized ranks or fractional ranks.
|
|
267
265
|
* If fraction = 0.0, the true minimum value of the stream is returned.
|
|
268
266
|
* If fraction = 1.0, the true maximum value of the stream is returned.
|
|
267
|
+
* If the parameter inclusive=true, the given rank is considered inclusive (includes the weight of an item)
|
|
269
268
|
*
|
|
270
269
|
* @return the approximation to the value at the given fraction
|
|
271
270
|
*/
|
|
272
|
-
T
|
|
271
|
+
using quantile_return_type = typename quantile_sketch_sorted_view<T, C, A>::quantile_return_type;
|
|
272
|
+
template<bool inclusive = false>
|
|
273
|
+
quantile_return_type get_quantile(double fraction) const;
|
|
273
274
|
|
|
274
275
|
/**
|
|
275
276
|
* This is a more efficient multiple-query version of get_quantile().
|
|
@@ -285,10 +286,12 @@ class kll_sketch {
|
|
|
285
286
|
* @param fractions given array of fractional positions in the hypothetical sorted stream.
|
|
286
287
|
* These are also called normalized ranks or fractional ranks.
|
|
287
288
|
* These fractions must be in the interval [0.0, 1.0], inclusive.
|
|
289
|
+
* If the parameter inclusive=true, the given fractions are considered inclusive (include weights of items)
|
|
288
290
|
*
|
|
289
291
|
* @return array of approximations to the given fractions in the same order as given fractions
|
|
290
292
|
* in the input array.
|
|
291
293
|
*/
|
|
294
|
+
template<bool inclusive = false>
|
|
292
295
|
std::vector<T, A> get_quantiles(const double* fractions, uint32_t size) const;
|
|
293
296
|
|
|
294
297
|
/**
|
|
@@ -304,11 +307,15 @@ class kll_sketch {
|
|
|
304
307
|
*
|
|
305
308
|
* @return array of approximations to the given number of evenly-spaced fractional ranks.
|
|
306
309
|
*/
|
|
310
|
+
template<bool inclusive = false>
|
|
307
311
|
std::vector<T, A> get_quantiles(uint32_t num) const;
|
|
308
312
|
|
|
309
313
|
/**
|
|
310
314
|
* Returns an approximation to the normalized (fractional) rank of the given value from 0 to 1,
|
|
311
315
|
* inclusive.
|
|
316
|
+
* With the template parameter inclusive=true the weight of the given value is included into the rank.
|
|
317
|
+
* Otherwise the rank equals the sum of the weights of all values that are less than the given value
|
|
318
|
+
* according to the comparator C.
|
|
312
319
|
*
|
|
313
320
|
* <p>The resulting approximation has a probabilistic guarantee that can be obtained from the
|
|
314
321
|
* get_normalized_rank_error(false) function.
|
|
@@ -318,6 +325,7 @@ class kll_sketch {
|
|
|
318
325
|
* @param value to be ranked
|
|
319
326
|
* @return an approximate rank of the given value
|
|
320
327
|
*/
|
|
328
|
+
template<bool inclusive = false>
|
|
321
329
|
double get_rank(const T& value) const;
|
|
322
330
|
|
|
323
331
|
/**
|
|
@@ -338,9 +346,12 @@ class kll_sketch {
|
|
|
338
346
|
*
|
|
339
347
|
* @return an array of m+1 doubles each of which is an approximation
|
|
340
348
|
* to the fraction of the input stream values (the mass) that fall into one of those intervals.
|
|
341
|
-
*
|
|
342
|
-
* split point, with the exception that the last interval will include maximum value.
|
|
349
|
+
* If the template parameter inclusive=false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
|
|
350
|
+
* split point, with the exception that the last interval will include the maximum value.
|
|
351
|
+
* If the template parameter inclusive=true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
|
|
352
|
+
* split point.
|
|
343
353
|
*/
|
|
354
|
+
template<bool inclusive = false>
|
|
344
355
|
vector_d<A> get_PMF(const T* split_points, uint32_t size) const;
|
|
345
356
|
|
|
346
357
|
/**
|
|
@@ -364,6 +375,7 @@ class kll_sketch {
|
|
|
364
375
|
* CDF array is the sum of the returned values in positions 0 through j of the returned PMF
|
|
365
376
|
* array.
|
|
366
377
|
*/
|
|
378
|
+
template<bool inclusive = false>
|
|
367
379
|
vector_d<A> get_CDF(const T* split_points, uint32_t size) const;
|
|
368
380
|
|
|
369
381
|
/**
|
|
@@ -378,18 +390,20 @@ class kll_sketch {
|
|
|
378
390
|
/**
|
|
379
391
|
* Computes size needed to serialize the current state of the sketch.
|
|
380
392
|
* This version is for fixed-size arithmetic types (integral and floating point).
|
|
393
|
+
* @param instance of a SerDe
|
|
381
394
|
* @return size in bytes needed to serialize this sketch
|
|
382
395
|
*/
|
|
383
|
-
template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
|
|
384
|
-
size_t get_serialized_size_bytes() const;
|
|
396
|
+
template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
|
|
397
|
+
size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
|
|
385
398
|
|
|
386
399
|
/**
|
|
387
400
|
* Computes size needed to serialize the current state of the sketch.
|
|
388
401
|
* This version is for all other types and can be expensive since every item needs to be looked at.
|
|
402
|
+
* @param instance of a SerDe
|
|
389
403
|
* @return size in bytes needed to serialize this sketch
|
|
390
404
|
*/
|
|
391
|
-
template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
|
|
392
|
-
size_t get_serialized_size_bytes() const;
|
|
405
|
+
template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
|
|
406
|
+
size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
|
|
393
407
|
|
|
394
408
|
/**
|
|
395
409
|
* Returns upper bound on the serialized size of a sketch given a parameter <em>k</em> and stream
|
|
@@ -421,8 +435,10 @@ class kll_sketch {
|
|
|
421
435
|
/**
|
|
422
436
|
* This method serializes the sketch into a given stream in a binary form
|
|
423
437
|
* @param os output stream
|
|
438
|
+
* @param instance of a SerDe
|
|
424
439
|
*/
|
|
425
|
-
|
|
440
|
+
template<typename SerDe = S>
|
|
441
|
+
void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
|
|
426
442
|
|
|
427
443
|
// This is a convenience alias for users
|
|
428
444
|
// The type returned by the following serialize method
|
|
@@ -434,23 +450,53 @@ class kll_sketch {
|
|
|
434
450
|
* It is a blank space of a given size.
|
|
435
451
|
* This header is used in Datasketches PostgreSQL extension.
|
|
436
452
|
* @param header_size_bytes space to reserve in front of the sketch
|
|
453
|
+
* @param instance of a SerDe
|
|
454
|
+
* @return serialized sketch as a vector of bytes
|
|
455
|
+
*/
|
|
456
|
+
template<typename SerDe = S>
|
|
457
|
+
vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
|
|
458
|
+
|
|
459
|
+
/**
|
|
460
|
+
* This method deserializes a sketch from a given stream.
|
|
461
|
+
* @param is input stream
|
|
462
|
+
* @param instance of an Allocator
|
|
463
|
+
* @return an instance of a sketch
|
|
464
|
+
*
|
|
465
|
+
* Deprecated, to be removed in the next major version
|
|
437
466
|
*/
|
|
438
|
-
|
|
467
|
+
static kll_sketch deserialize(std::istream& is, const A& allocator = A());
|
|
439
468
|
|
|
440
469
|
/**
|
|
441
470
|
* This method deserializes a sketch from a given stream.
|
|
442
471
|
* @param is input stream
|
|
472
|
+
* @param instance of a SerDe
|
|
473
|
+
* @param instance of an Allocator
|
|
474
|
+
* @return an instance of a sketch
|
|
475
|
+
*/
|
|
476
|
+
template<typename SerDe = S>
|
|
477
|
+
static kll_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
|
|
478
|
+
|
|
479
|
+
/**
|
|
480
|
+
* This method deserializes a sketch from a given array of bytes.
|
|
481
|
+
* @param bytes pointer to the array of bytes
|
|
482
|
+
* @param size the size of the array
|
|
483
|
+
* @param instance of an Allocator
|
|
443
484
|
* @return an instance of a sketch
|
|
485
|
+
*
|
|
486
|
+
* Deprecated, to be removed in the next major version
|
|
444
487
|
*/
|
|
445
|
-
static kll_sketch
|
|
488
|
+
static kll_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
|
|
446
489
|
|
|
447
490
|
/**
|
|
448
491
|
* This method deserializes a sketch from a given array of bytes.
|
|
449
492
|
* @param bytes pointer to the array of bytes
|
|
450
493
|
* @param size the size of the array
|
|
494
|
+
* @param instance of a SerDe
|
|
495
|
+
* @param instance of an Allocator
|
|
451
496
|
* @return an instance of a sketch
|
|
452
497
|
*/
|
|
453
|
-
|
|
498
|
+
template<typename SerDe = S>
|
|
499
|
+
static kll_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
|
|
454
500
|
|
|
455
501
|
/*
|
|
456
502
|
* Gets the normalized rank error given k and pmf.
|
|
@@ -472,6 +518,9 @@ class kll_sketch {
|
|
|
472
518
|
const_iterator begin() const;
|
|
473
519
|
const_iterator end() const;
|
|
474
520
|
|
|
521
|
+
template<bool inclusive = false>
|
|
522
|
+
quantile_sketch_sorted_view<T, C, A> get_sorted_view(bool cumulative) const;
|
|
523
|
+
|
|
475
524
|
#ifdef KLL_VALIDATION
|
|
476
525
|
uint8_t get_num_levels() { return num_levels_; }
|
|
477
526
|
uint32_t* get_levels() { return levels_; }
|
|
@@ -480,7 +529,7 @@ class kll_sketch {
|
|
|
480
529
|
|
|
481
530
|
private:
|
|
482
531
|
/* Serialized sketch layout:
|
|
483
|
-
*
|
|
532
|
+
* Addr:
|
|
484
533
|
* || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
|
|
485
534
|
* 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts |
|
|
486
535
|
* || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 |
|
|
@@ -515,8 +564,6 @@ class kll_sketch {
|
|
|
515
564
|
T* max_value_;
|
|
516
565
|
bool is_level_zero_sorted_;
|
|
517
566
|
|
|
518
|
-
friend class kll_quantile_calculator<T, C, A>;
|
|
519
|
-
|
|
520
567
|
// for deserialization
|
|
521
568
|
class item_deleter;
|
|
522
569
|
class items_deleter;
|
|
@@ -535,15 +582,21 @@ class kll_sketch {
|
|
|
535
582
|
uint8_t find_level_to_compact() const;
|
|
536
583
|
void add_empty_top_level_to_completely_full_sketch();
|
|
537
584
|
void sort_level_zero();
|
|
538
|
-
|
|
585
|
+
|
|
586
|
+
template<bool inclusive>
|
|
539
587
|
vector_d<A> get_PMF_or_CDF(const T* split_points, uint32_t size, bool is_CDF) const;
|
|
588
|
+
template<bool inclusive>
|
|
540
589
|
void increment_buckets_unsorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
|
|
541
590
|
const T* split_points, uint32_t size, double* buckets) const;
|
|
591
|
+
template<bool inclusive>
|
|
542
592
|
void increment_buckets_sorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
|
|
543
593
|
const T* split_points, uint32_t size, double* buckets) const;
|
|
594
|
+
|
|
544
595
|
template<typename O> void merge_higher_levels(O&& other, uint64_t final_n);
|
|
545
|
-
|
|
546
|
-
|
|
596
|
+
|
|
597
|
+
template<typename FwdSk>
|
|
598
|
+
void populate_work_arrays(FwdSk&& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels);
|
|
599
|
+
|
|
547
600
|
void assert_correct_total_weight() const;
|
|
548
601
|
uint32_t safe_level_size(uint8_t level) const;
|
|
549
602
|
uint32_t get_num_retained_above_level_zero() const;
|
|
@@ -555,8 +608,9 @@ class kll_sketch {
|
|
|
555
608
|
|
|
556
609
|
// implementations for floating point types
|
|
557
610
|
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
|
|
558
|
-
static TT get_invalid_value() {
|
|
559
|
-
|
|
611
|
+
static const TT& get_invalid_value() {
|
|
612
|
+
static TT value = std::numeric_limits<TT>::quiet_NaN();
|
|
613
|
+
return value;
|
|
560
614
|
}
|
|
561
615
|
|
|
562
616
|
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
|
|
@@ -566,8 +620,8 @@ class kll_sketch {
|
|
|
566
620
|
|
|
567
621
|
// implementations for all other types
|
|
568
622
|
template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
|
|
569
|
-
static TT get_invalid_value() {
|
|
570
|
-
throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of
|
|
623
|
+
static const TT& get_invalid_value() {
|
|
624
|
+
throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of value");
|
|
571
625
|
}
|
|
572
626
|
|
|
573
627
|
template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
|