datasketches 0.2.2 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/README.md +8 -8
  6. data/ext/datasketches/kll_wrapper.cpp +5 -1
  7. data/ext/datasketches/theta_wrapper.cpp +20 -4
  8. data/lib/datasketches/version.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +27 -5
  10. data/vendor/datasketches-cpp/LICENSE +40 -3
  11. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  12. data/vendor/datasketches-cpp/NOTICE +1 -1
  13. data/vendor/datasketches-cpp/README.md +76 -9
  14. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +18 -13
  16. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +16 -0
  18. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
  19. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
  20. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
  21. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
  22. data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
  23. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  24. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
  25. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
  26. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
  27. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +1 -1
  28. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
  29. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +10 -6
  31. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
  33. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
  34. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  35. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
  36. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +30 -12
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
  38. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
  41. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
  42. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
  44. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
  45. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
  46. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
  47. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
  48. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
  49. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
  50. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
  51. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
  52. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
  53. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
  54. data/vendor/datasketches-cpp/kll/CMakeLists.txt +5 -19
  55. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
  56. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
  57. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +103 -44
  58. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +110 -130
  59. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +156 -23
  60. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
  61. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  62. data/vendor/datasketches-cpp/python/CMakeLists.txt +17 -6
  63. data/vendor/datasketches-cpp/python/README.md +57 -50
  64. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  65. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  66. data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
  67. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
  68. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +49 -14
  69. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
  70. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
  71. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
  72. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +2 -2
  73. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +12 -5
  74. data/vendor/datasketches-cpp/python/tests/kll_test.py +12 -6
  75. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
  76. data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
  77. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  78. data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
  79. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
  80. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
  81. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
  82. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
  83. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
  84. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
  85. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
  86. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
  87. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
  88. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
  89. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
  90. data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
  91. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
  92. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
  93. data/vendor/datasketches-cpp/req/CMakeLists.txt +6 -21
  94. data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
  95. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
  96. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
  97. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +66 -61
  98. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
  99. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  100. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +54 -12
  101. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +45 -34
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +33 -15
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
  107. data/vendor/datasketches-cpp/setup.py +10 -7
  108. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  110. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +92 -23
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +7 -6
  114. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +3 -2
  115. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
  116. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +150 -93
  117. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
  118. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -2
  120. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  121. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +9 -5
  122. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +39 -10
  123. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  124. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
  125. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  126. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  127. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  130. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
  131. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +446 -0
  132. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +429 -1
  133. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -11
  134. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  135. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  136. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
  137. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  138. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +29 -9
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +34 -14
  141. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  142. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  143. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
  144. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
  145. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
  146. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
  147. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +8 -0
  148. metadata +34 -12
  149. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
  150. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
  151. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
  152. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
  153. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  154. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -23,8 +23,9 @@
23
23
  #include <functional>
24
24
  #include <memory>
25
25
  #include <vector>
26
+ #include <cmath>
26
27
 
27
- #include "kll_quantile_calculator.hpp"
28
+ #include "quantile_sketch_sorted_view.hpp"
28
29
  #include "common_defs.hpp"
29
30
  #include "serde.hpp"
30
31
 
@@ -35,7 +36,7 @@ namespace datasketches {
35
36
  * and nearly optimal accuracy per retained item.
36
37
  * See <a href="https://arxiv.org/abs/1603.05346v2">Optimal Quantile Approximation in Streams</a>.
37
38
  *
38
- * <p>This is a stochastic streaming sketch that enables near-real time analysis of the
39
+ * <p>This is a stochastic streaming sketch that enables near real-time analysis of the
39
40
  * approximate distribution of values from a very large stream in a single pass, requiring only
40
41
  * that the values are comparable.
41
42
  * The analysis is obtained using <i>get_quantile()</i> or <i>get_quantiles()</i> functions or the
@@ -153,18 +154,28 @@ template<typename A> using vector_u32 = std::vector<uint32_t, AllocU32<A>>;
153
154
  template<typename A> using AllocD = typename std::allocator_traits<A>::template rebind_alloc<double>;
154
155
  template<typename A> using vector_d = std::vector<double, AllocD<A>>;
155
156
 
156
- template <typename T, typename C = std::less<T>, typename S = serde<T>, typename A = std::allocator<T>>
157
+ namespace kll_constants {
158
+ const uint16_t DEFAULT_K = 200;
159
+ }
160
+
161
+ template <
162
+ typename T,
163
+ typename C = std::less<T>, // strict weak ordering function (see C++ named requirements: Compare)
164
+ typename S = serde<T>, // deprecated, to be removed in the next major version
165
+ typename A = std::allocator<T>
166
+ >
157
167
  class kll_sketch {
158
168
  public:
159
169
  using value_type = T;
160
170
  using comparator = C;
161
171
 
162
172
  static const uint8_t DEFAULT_M = 8;
163
- static const uint16_t DEFAULT_K = 200;
173
+ // TODO: Redundant and deprecated. Will be remove din next major version.
174
+ static const uint16_t DEFAULT_K = kll_constants::DEFAULT_K;
164
175
  static const uint16_t MIN_K = DEFAULT_M;
165
176
  static const uint16_t MAX_K = (1 << 16) - 1;
166
177
 
167
- explicit kll_sketch(uint16_t k = DEFAULT_K, const A& allocator = A());
178
+ explicit kll_sketch(uint16_t k = kll_constants::DEFAULT_K, const A& allocator = A());
168
179
  kll_sketch(const kll_sketch& other);
169
180
  kll_sketch(kll_sketch&& other) noexcept;
170
181
  ~kll_sketch();
@@ -173,31 +184,17 @@ class kll_sketch {
173
184
 
174
185
  /**
175
186
  * Updates this sketch with the given data item.
176
- * This method takes lvalue.
177
- * @param value an item from a stream of items
178
- */
179
- void update(const T& value);
180
-
181
- /**
182
- * Updates this sketch with the given data item.
183
- * This method takes rvalue.
184
187
  * @param value an item from a stream of items
185
188
  */
186
- void update(T&& value);
189
+ template<typename FwdT>
190
+ void update(FwdT&& value);
187
191
 
188
192
  /**
189
193
  * Merges another sketch into this one.
190
- * This method takes lvalue.
191
194
  * @param other sketch to merge into this one
192
195
  */
193
- void merge(const kll_sketch& other);
194
-
195
- /**
196
- * Merges another sketch into this one.
197
- * This method takes rvalue.
198
- * @param other sketch to merge into this one
199
- */
200
- void merge(kll_sketch&& other);
196
+ template<typename FwdSk>
197
+ void merge(FwdSk&& other);
201
198
 
202
199
  /**
203
200
  * Returns true if this sketch is empty.
@@ -245,6 +242,12 @@ class kll_sketch {
245
242
  */
246
243
  T get_max_value() const;
247
244
 
245
+ /**
246
+ * Returns an instance of the comparator for this sketch.
247
+ * @return comparator
248
+ */
249
+ C get_comparator() const;
250
+
248
251
  /**
249
252
  * Returns an approximation to the value of the data item
250
253
  * that would be preceded by the given fraction of a hypothetical sorted
@@ -261,10 +264,13 @@ class kll_sketch {
261
264
  * These are also called normalized ranks or fractional ranks.
262
265
  * If fraction = 0.0, the true minimum value of the stream is returned.
263
266
  * If fraction = 1.0, the true maximum value of the stream is returned.
267
+ * If the parameter inclusive=true, the given rank is considered inclusive (includes the weight of an item)
264
268
  *
265
269
  * @return the approximation to the value at the given fraction
266
270
  */
267
- T get_quantile(double fraction) const;
271
+ using quantile_return_type = typename quantile_sketch_sorted_view<T, C, A>::quantile_return_type;
272
+ template<bool inclusive = false>
273
+ quantile_return_type get_quantile(double fraction) const;
268
274
 
269
275
  /**
270
276
  * This is a more efficient multiple-query version of get_quantile().
@@ -280,10 +286,12 @@ class kll_sketch {
280
286
  * @param fractions given array of fractional positions in the hypothetical sorted stream.
281
287
  * These are also called normalized ranks or fractional ranks.
282
288
  * These fractions must be in the interval [0.0, 1.0], inclusive.
289
+ * If the parameter inclusive=true, the given fractions are considered inclusive (include weights of items)
283
290
  *
284
291
  * @return array of approximations to the given fractions in the same order as given fractions
285
292
  * in the input array.
286
293
  */
294
+ template<bool inclusive = false>
287
295
  std::vector<T, A> get_quantiles(const double* fractions, uint32_t size) const;
288
296
 
289
297
  /**
@@ -299,11 +307,15 @@ class kll_sketch {
299
307
  *
300
308
  * @return array of approximations to the given number of evenly-spaced fractional ranks.
301
309
  */
310
+ template<bool inclusive = false>
302
311
  std::vector<T, A> get_quantiles(uint32_t num) const;
303
312
 
304
313
  /**
305
314
  * Returns an approximation to the normalized (fractional) rank of the given value from 0 to 1,
306
315
  * inclusive.
316
+ * With the template parameter inclusive=true the weight of the given value is included into the rank.
317
+ * Otherwise the rank equals the sum of the weights of all values that are less than the given value
318
+ * according to the comparator C.
307
319
  *
308
320
  * <p>The resulting approximation has a probabilistic guarantee that can be obtained from the
309
321
  * get_normalized_rank_error(false) function.
@@ -313,6 +325,7 @@ class kll_sketch {
313
325
  * @param value to be ranked
314
326
  * @return an approximate rank of the given value
315
327
  */
328
+ template<bool inclusive = false>
316
329
  double get_rank(const T& value) const;
317
330
 
318
331
  /**
@@ -333,9 +346,12 @@ class kll_sketch {
333
346
  *
334
347
  * @return an array of m+1 doubles each of which is an approximation
335
348
  * to the fraction of the input stream values (the mass) that fall into one of those intervals.
336
- * The definition of an "interval" is inclusive of the left split point and exclusive of the right
337
- * split point, with the exception that the last interval will include maximum value.
349
+ * If the template parameter inclusive=false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
350
+ * split point, with the exception that the last interval will include the maximum value.
351
+ * If the template parameter inclusive=true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
352
+ * split point.
338
353
  */
354
+ template<bool inclusive = false>
339
355
  vector_d<A> get_PMF(const T* split_points, uint32_t size) const;
340
356
 
341
357
  /**
@@ -359,6 +375,7 @@ class kll_sketch {
359
375
  * CDF array is the sum of the returned values in positions 0 through j of the returned PMF
360
376
  * array.
361
377
  */
378
+ template<bool inclusive = false>
362
379
  vector_d<A> get_CDF(const T* split_points, uint32_t size) const;
363
380
 
364
381
  /**
@@ -373,18 +390,20 @@ class kll_sketch {
373
390
  /**
374
391
  * Computes size needed to serialize the current state of the sketch.
375
392
  * This version is for fixed-size arithmetic types (integral and floating point).
393
+ * @param instance of a SerDe
376
394
  * @return size in bytes needed to serialize this sketch
377
395
  */
378
- template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
379
- size_t get_serialized_size_bytes() const;
396
+ template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
397
+ size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
380
398
 
381
399
  /**
382
400
  * Computes size needed to serialize the current state of the sketch.
383
401
  * This version is for all other types and can be expensive since every item needs to be looked at.
402
+ * @param instance of a SerDe
384
403
  * @return size in bytes needed to serialize this sketch
385
404
  */
386
- template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
387
- size_t get_serialized_size_bytes() const;
405
+ template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
406
+ size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
388
407
 
389
408
  /**
390
409
  * Returns upper bound on the serialized size of a sketch given a parameter <em>k</em> and stream
@@ -416,8 +435,10 @@ class kll_sketch {
416
435
  /**
417
436
  * This method serializes the sketch into a given stream in a binary form
418
437
  * @param os output stream
438
+ * @param instance of a SerDe
419
439
  */
420
- void serialize(std::ostream& os) const;
440
+ template<typename SerDe = S>
441
+ void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
421
442
 
422
443
  // This is a convenience alias for users
423
444
  // The type returned by the following serialize method
@@ -429,23 +450,53 @@ class kll_sketch {
429
450
  * It is a blank space of a given size.
430
451
  * This header is used in Datasketches PostgreSQL extension.
431
452
  * @param header_size_bytes space to reserve in front of the sketch
453
+ * @param instance of a SerDe
454
+ * @return serialized sketch as a vector of bytes
432
455
  */
433
- vector_bytes serialize(unsigned header_size_bytes = 0) const;
456
+ template<typename SerDe = S>
457
+ vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
434
458
 
435
459
  /**
436
460
  * This method deserializes a sketch from a given stream.
437
461
  * @param is input stream
462
+ * @param instance of an Allocator
438
463
  * @return an instance of a sketch
464
+ *
465
+ * Deprecated, to be removed in the next major version
439
466
  */
440
- static kll_sketch<T, C, S, A> deserialize(std::istream& is, const A& allocator = A());
467
+ static kll_sketch deserialize(std::istream& is, const A& allocator = A());
468
+
469
+ /**
470
+ * This method deserializes a sketch from a given stream.
471
+ * @param is input stream
472
+ * @param instance of a SerDe
473
+ * @param instance of an Allocator
474
+ * @return an instance of a sketch
475
+ */
476
+ template<typename SerDe = S>
477
+ static kll_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
478
+
479
+ /**
480
+ * This method deserializes a sketch from a given array of bytes.
481
+ * @param bytes pointer to the array of bytes
482
+ * @param size the size of the array
483
+ * @param instance of an Allocator
484
+ * @return an instance of a sketch
485
+ *
486
+ * Deprecated, to be removed in the next major version
487
+ */
488
+ static kll_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
441
489
 
442
490
  /**
443
491
  * This method deserializes a sketch from a given array of bytes.
444
492
  * @param bytes pointer to the array of bytes
445
493
  * @param size the size of the array
494
+ * @param instance of a SerDe
495
+ * @param instance of an Allocator
446
496
  * @return an instance of a sketch
447
497
  */
448
- static kll_sketch<T, C, S, A> deserialize(const void* bytes, size_t size, const A& allocator = A());
498
+ template<typename SerDe = S>
499
+ static kll_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
449
500
 
450
501
  /*
451
502
  * Gets the normalized rank error given k and pmf.
@@ -467,6 +518,9 @@ class kll_sketch {
467
518
  const_iterator begin() const;
468
519
  const_iterator end() const;
469
520
 
521
+ template<bool inclusive = false>
522
+ quantile_sketch_sorted_view<T, C, A> get_sorted_view(bool cumulative) const;
523
+
470
524
  #ifdef KLL_VALIDATION
471
525
  uint8_t get_num_levels() { return num_levels_; }
472
526
  uint32_t* get_levels() { return levels_; }
@@ -475,7 +529,7 @@ class kll_sketch {
475
529
 
476
530
  private:
477
531
  /* Serialized sketch layout:
478
- * Adr:
532
+ * Addr:
479
533
  * || 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 |
480
534
  * 0 || unused | M |--------K--------| Flags | FamID | SerVer | PreambleInts |
481
535
  * || 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 |
@@ -510,8 +564,6 @@ class kll_sketch {
510
564
  T* max_value_;
511
565
  bool is_level_zero_sorted_;
512
566
 
513
- friend class kll_quantile_calculator<T, C, A>;
514
-
515
567
  // for deserialization
516
568
  class item_deleter;
517
569
  class items_deleter;
@@ -530,15 +582,21 @@ class kll_sketch {
530
582
  uint8_t find_level_to_compact() const;
531
583
  void add_empty_top_level_to_completely_full_sketch();
532
584
  void sort_level_zero();
533
- std::unique_ptr<kll_quantile_calculator<T, C, A>, std::function<void(kll_quantile_calculator<T, C, A>*)>> get_quantile_calculator();
585
+
586
+ template<bool inclusive>
534
587
  vector_d<A> get_PMF_or_CDF(const T* split_points, uint32_t size, bool is_CDF) const;
588
+ template<bool inclusive>
535
589
  void increment_buckets_unsorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
536
590
  const T* split_points, uint32_t size, double* buckets) const;
591
+ template<bool inclusive>
537
592
  void increment_buckets_sorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
538
593
  const T* split_points, uint32_t size, double* buckets) const;
594
+
539
595
  template<typename O> void merge_higher_levels(O&& other, uint64_t final_n);
540
- void populate_work_arrays(const kll_sketch& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels);
541
- void populate_work_arrays(kll_sketch&& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels);
596
+
597
+ template<typename FwdSk>
598
+ void populate_work_arrays(FwdSk&& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels);
599
+
542
600
  void assert_correct_total_weight() const;
543
601
  uint32_t safe_level_size(uint8_t level) const;
544
602
  uint32_t get_num_retained_above_level_zero() const;
@@ -550,8 +608,9 @@ class kll_sketch {
550
608
 
551
609
  // implementations for floating point types
552
610
  template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
553
- static TT get_invalid_value() {
554
- return std::numeric_limits<TT>::quiet_NaN();
611
+ static const TT& get_invalid_value() {
612
+ static TT value = std::numeric_limits<TT>::quiet_NaN();
613
+ return value;
555
614
  }
556
615
 
557
616
  template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
@@ -561,8 +620,8 @@ class kll_sketch {
561
620
 
562
621
  // implementations for all other types
563
622
  template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
564
- static TT get_invalid_value() {
565
- throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of values");
623
+ static const TT& get_invalid_value() {
624
+ throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of value");
566
625
  }
567
626
 
568
627
  template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>