datasketches 0.2.3 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (143) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/README.md +8 -8
  4. data/ext/datasketches/kll_wrapper.cpp +7 -3
  5. data/ext/datasketches/theta_wrapper.cpp +20 -4
  6. data/lib/datasketches/version.rb +1 -1
  7. data/vendor/datasketches-cpp/CMakeLists.txt +25 -5
  8. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  9. data/vendor/datasketches-cpp/NOTICE +6 -5
  10. data/vendor/datasketches-cpp/README.md +76 -9
  11. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  12. data/vendor/datasketches-cpp/common/CMakeLists.txt +18 -13
  13. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
  14. data/vendor/datasketches-cpp/common/include/common_defs.hpp +14 -0
  15. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
  16. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
  17. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
  18. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
  19. data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
  20. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  21. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +3 -1
  22. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
  23. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
  24. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +10 -6
  25. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  26. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
  27. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
  28. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  29. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
  30. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +29 -11
  31. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
  32. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
  33. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  34. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
  35. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
  36. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
  37. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
  38. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
  39. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
  40. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
  41. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
  42. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
  43. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
  44. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
  45. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
  46. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
  47. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
  48. data/vendor/datasketches-cpp/kll/CMakeLists.txt +5 -19
  49. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
  50. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +5 -2
  51. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +108 -41
  52. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +150 -132
  53. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +165 -31
  54. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
  55. data/vendor/datasketches-cpp/pyproject.toml +1 -1
  56. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  57. data/vendor/datasketches-cpp/python/README.md +13 -9
  58. data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
  59. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
  60. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +48 -13
  61. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
  62. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
  63. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
  64. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +1 -0
  65. data/vendor/datasketches-cpp/python/tests/kll_test.py +10 -4
  66. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
  67. data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
  68. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +656 -0
  69. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1373 -0
  70. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
  71. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
  72. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
  73. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
  74. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
  75. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
  76. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
  77. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
  78. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
  79. data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
  80. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
  81. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +975 -0
  82. data/vendor/datasketches-cpp/req/CMakeLists.txt +6 -21
  83. data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
  84. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +6 -0
  85. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +30 -2
  86. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +73 -23
  87. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +95 -63
  88. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +74 -3
  89. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  90. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +44 -7
  91. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +44 -33
  92. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +33 -15
  94. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
  95. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
  96. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
  97. data/vendor/datasketches-cpp/setup.py +1 -1
  98. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  99. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  100. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +92 -23
  101. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  102. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +7 -6
  103. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +3 -2
  104. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
  105. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +150 -93
  106. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
  107. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  108. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -2
  109. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  110. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -4
  111. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +34 -9
  112. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  113. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
  114. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  115. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  116. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  117. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  118. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  119. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
  120. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +446 -0
  121. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +429 -1
  122. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -11
  123. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  124. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  125. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
  126. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  127. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  128. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +29 -9
  129. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +34 -14
  130. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  131. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  132. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
  133. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
  134. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
  135. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
  136. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +8 -0
  137. metadata +33 -12
  138. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
  139. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
  140. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
  141. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
  142. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  143. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -23,7 +23,10 @@
23
23
  #include <iostream>
24
24
  #include <iomanip>
25
25
  #include <sstream>
26
+ #include <stdexcept>
26
27
 
28
+ #include "conditional_forward.hpp"
29
+ #include "count_zeros.hpp"
27
30
  #include "memory_operations.hpp"
28
31
  #include "kll_helper.hpp"
29
32
 
@@ -67,7 +70,7 @@ max_value_(nullptr),
67
70
  is_level_zero_sorted_(other.is_level_zero_sorted_)
68
71
  {
69
72
  items_ = allocator_.allocate(items_size_);
70
- std::copy(&other.items_[levels_[0]], &other.items_[levels_[num_levels_]], &items_[levels_[0]]);
73
+ for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
71
74
  if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
72
75
  if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
73
76
  }
@@ -146,19 +149,39 @@ kll_sketch<T, C, S, A>::~kll_sketch() {
146
149
  }
147
150
 
148
151
  template<typename T, typename C, typename S, typename A>
149
- void kll_sketch<T, C, S, A>::update(const T& value) {
150
- if (!check_update_value(value)) { return; }
151
- update_min_max(value);
152
- const uint32_t index = internal_update();
153
- new (&items_[index]) T(value);
152
+ template<typename TT, typename CC, typename SS, typename AA>
153
+ kll_sketch<T, C, S, A>::kll_sketch(const kll_sketch<TT, CC, SS, AA>& other, const A& allocator):
154
+ allocator_(allocator),
155
+ k_(other.k_),
156
+ m_(other.m_),
157
+ min_k_(other.min_k_),
158
+ n_(other.n_),
159
+ num_levels_(other.num_levels_),
160
+ levels_(other.levels_, allocator_),
161
+ items_(nullptr),
162
+ items_size_(other.items_size_),
163
+ min_value_(nullptr),
164
+ max_value_(nullptr),
165
+ is_level_zero_sorted_(other.is_level_zero_sorted_)
166
+ {
167
+ static_assert(
168
+ std::is_constructible<T, TT>::value,
169
+ "Type converting constructor requires new type to be constructible from existing type"
170
+ );
171
+ items_ = allocator_.allocate(items_size_);
172
+ for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
173
+ if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
174
+ if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
175
+ check_sorting();
154
176
  }
155
177
 
156
178
  template<typename T, typename C, typename S, typename A>
157
- void kll_sketch<T, C, S, A>::update(T&& value) {
179
+ template<typename FwdT>
180
+ void kll_sketch<T, C, S, A>::update(FwdT&& value) {
158
181
  if (!check_update_value(value)) { return; }
159
182
  update_min_max(value);
160
183
  const uint32_t index = internal_update();
161
- new (&items_[index]) T(std::move(value));
184
+ new (&items_[index]) T(std::forward<FwdT>(value));
162
185
  }
163
186
 
164
187
  template<typename T, typename C, typename S, typename A>
@@ -181,22 +204,23 @@ uint32_t kll_sketch<T, C, S, A>::internal_update() {
181
204
  }
182
205
 
183
206
  template<typename T, typename C, typename S, typename A>
184
- void kll_sketch<T, C, S, A>::merge(const kll_sketch& other) {
207
+ template<typename FwdSk>
208
+ void kll_sketch<T, C, S, A>::merge(FwdSk&& other) {
185
209
  if (other.is_empty()) return;
186
210
  if (m_ != other.m_) {
187
211
  throw std::invalid_argument("incompatible M: " + std::to_string(m_) + " and " + std::to_string(other.m_));
188
212
  }
189
213
  if (is_empty()) {
190
- min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
191
- max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
214
+ min_value_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.min_value_));
215
+ max_value_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.max_value_));
192
216
  } else {
193
- if (C()(*other.min_value_, *min_value_)) *min_value_ = *other.min_value_;
194
- if (C()(*max_value_, *other.max_value_)) *max_value_ = *other.max_value_;
217
+ if (C()(*other.min_value_, *min_value_)) *min_value_ = conditional_forward<FwdSk>(*other.min_value_);
218
+ if (C()(*max_value_, *other.max_value_)) *max_value_ = conditional_forward<FwdSk>(*other.max_value_);
195
219
  }
196
220
  const uint64_t final_n = n_ + other.n_;
197
221
  for (uint32_t i = other.levels_[0]; i < other.levels_[1]; i++) {
198
222
  const uint32_t index = internal_update();
199
- new (&items_[index]) T(other.items_[i]);
223
+ new (&items_[index]) T(conditional_forward<FwdSk>(other.items_[i]));
200
224
  }
201
225
  if (other.num_levels_ >= 2) merge_higher_levels(other, final_n);
202
226
  n_ = final_n;
@@ -204,30 +228,6 @@ void kll_sketch<T, C, S, A>::merge(const kll_sketch& other) {
204
228
  assert_correct_total_weight();
205
229
  }
206
230
 
207
- template<typename T, typename C, typename S, typename A>
208
- void kll_sketch<T, C, S, A>::merge(kll_sketch&& other) {
209
- if (other.is_empty()) return;
210
- if (m_ != other.m_) {
211
- throw std::invalid_argument("incompatible M: " + std::to_string(m_) + " and " + std::to_string(other.m_));
212
- }
213
- if (is_empty()) {
214
- min_value_ = new (allocator_.allocate(1)) T(std::move(*other.min_value_));
215
- max_value_ = new (allocator_.allocate(1)) T(std::move(*other.max_value_));
216
- } else {
217
- if (C()(*other.min_value_, *min_value_)) *min_value_ = std::move(*other.min_value_);
218
- if (C()(*max_value_, *other.max_value_)) *max_value_ = std::move(*other.max_value_);
219
- }
220
- const uint64_t final_n = n_ + other.n_;
221
- for (uint32_t i = other.levels_[0]; i < other.levels_[1]; i++) {
222
- const uint32_t index = internal_update();
223
- new (&items_[index]) T(std::move(other.items_[i]));
224
- }
225
- if (other.num_levels_ >= 2) merge_higher_levels(std::forward<kll_sketch>(other), final_n);
226
- n_ = final_n;
227
- if (other.is_estimation_mode()) min_k_ = std::min(min_k_, other.min_k_);
228
- assert_correct_total_weight();
229
- }
230
-
231
231
  template<typename T, typename C, typename S, typename A>
232
232
  bool kll_sketch<T, C, S, A>::is_empty() const {
233
233
  return n_ == 0;
@@ -266,43 +266,49 @@ T kll_sketch<T, C, S, A>::get_max_value() const {
266
266
  }
267
267
 
268
268
  template<typename T, typename C, typename S, typename A>
269
- T kll_sketch<T, C, S, A>::get_quantile(double fraction) const {
269
+ C kll_sketch<T, C, S, A>::get_comparator() const {
270
+ return C();
271
+ }
272
+
273
+ template<typename T, typename C, typename S, typename A>
274
+ template<bool inclusive>
275
+ auto kll_sketch<T, C, S, A>::get_quantile(double rank) const -> quantile_return_type {
270
276
  if (is_empty()) return get_invalid_value();
271
- if (fraction == 0.0) return *min_value_;
272
- if (fraction == 1.0) return *max_value_;
273
- if ((fraction < 0.0) || (fraction > 1.0)) {
277
+ if (rank == 0.0) return *min_value_;
278
+ if (rank == 1.0) return *max_value_;
279
+ if ((rank < 0.0) || (rank > 1.0)) {
274
280
  throw std::invalid_argument("Fraction cannot be less than zero or greater than 1.0");
275
281
  }
276
- // has side effect of sorting level zero if needed
277
- auto quantile_calculator(const_cast<kll_sketch*>(this)->get_quantile_calculator());
278
- return quantile_calculator->get_quantile(fraction);
282
+ // may have a side effect of sorting level zero if needed
283
+ return get_sorted_view<inclusive>(true).get_quantile(rank);
279
284
  }
280
285
 
281
286
  template<typename T, typename C, typename S, typename A>
282
- std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(const double* fractions, uint32_t size) const {
287
+ template<bool inclusive>
288
+ std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(const double* ranks, uint32_t size) const {
283
289
  std::vector<T, A> quantiles(allocator_);
284
290
  if (is_empty()) return quantiles;
285
- std::unique_ptr<kll_quantile_calculator<T, C, A>, std::function<void(kll_quantile_calculator<T, C, A>*)>> quantile_calculator;
286
291
  quantiles.reserve(size);
292
+
293
+ // may have a side effect of sorting level zero if needed
294
+ auto view = get_sorted_view<inclusive>(true);
295
+
287
296
  for (uint32_t i = 0; i < size; i++) {
288
- const double fraction = fractions[i];
289
- if ((fraction < 0.0) || (fraction > 1.0)) {
297
+ const double rank = ranks[i];
298
+ if ((rank < 0.0) || (rank > 1.0)) {
290
299
  throw std::invalid_argument("Fraction cannot be less than zero or greater than 1.0");
291
300
  }
292
- if (fraction == 0.0) quantiles.push_back(*min_value_);
293
- else if (fraction == 1.0) quantiles.push_back(*max_value_);
301
+ else if (rank == 0.0) quantiles.push_back(*min_value_);
302
+ else if (rank == 1.0) quantiles.push_back(*max_value_);
294
303
  else {
295
- if (!quantile_calculator) {
296
- // has side effect of sorting level zero if needed
297
- quantile_calculator = const_cast<kll_sketch*>(this)->get_quantile_calculator();
298
- }
299
- quantiles.push_back(quantile_calculator->get_quantile(fraction));
304
+ quantiles.push_back(view.get_quantile(rank));
300
305
  }
301
306
  }
302
307
  return quantiles;
303
308
  }
304
309
 
305
310
  template<typename T, typename C, typename S, typename A>
311
+ template<bool inclusive>
306
312
  std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(uint32_t num) const {
307
313
  if (is_empty()) return std::vector<T, A>(allocator_);
308
314
  if (num == 0) {
@@ -316,20 +322,21 @@ std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(uint32_t num) const {
316
322
  if (num > 1) {
317
323
  fractions[num - 1] = 1.0;
318
324
  }
319
- return get_quantiles(fractions.data(), num);
325
+ return get_quantiles<inclusive>(fractions.data(), num);
320
326
  }
321
327
 
322
328
  template<typename T, typename C, typename S, typename A>
329
+ template<bool inclusive>
323
330
  double kll_sketch<T, C, S, A>::get_rank(const T& value) const {
324
331
  if (is_empty()) return std::numeric_limits<double>::quiet_NaN();
325
332
  uint8_t level = 0;
326
333
  uint64_t weight = 1;
327
334
  uint64_t total = 0;
328
335
  while (level < num_levels_) {
329
- const auto from_index(levels_[level]);
330
- const auto to_index(levels_[level + 1]); // exclusive
336
+ const auto from_index = levels_[level];
337
+ const auto to_index = levels_[level + 1]; // exclusive
331
338
  for (uint32_t i = from_index; i < to_index; i++) {
332
- if (C()(items_[i], value)) {
339
+ if (inclusive ? !C()(value, items_[i]) : C()(items_[i], value)) {
333
340
  total += weight;
334
341
  } else if ((level > 0) || is_level_zero_sorted_) {
335
342
  break; // levels above 0 are sorted, no point comparing further
@@ -342,13 +349,15 @@ double kll_sketch<T, C, S, A>::get_rank(const T& value) const {
342
349
  }
343
350
 
344
351
  template<typename T, typename C, typename S, typename A>
352
+ template<bool inclusive>
345
353
  vector_d<A> kll_sketch<T, C, S, A>::get_PMF(const T* split_points, uint32_t size) const {
346
- return get_PMF_or_CDF(split_points, size, false);
354
+ return get_PMF_or_CDF<inclusive>(split_points, size, false);
347
355
  }
348
356
 
349
357
  template<typename T, typename C, typename S, typename A>
358
+ template<bool inclusive>
350
359
  vector_d<A> kll_sketch<T, C, S, A>::get_CDF(const T* split_points, uint32_t size) const {
351
- return get_PMF_or_CDF(split_points, size, true);
360
+ return get_PMF_or_CDF<inclusive>(split_points, size, true);
352
361
  }
353
362
 
354
363
  template<typename T, typename C, typename S, typename A>
@@ -358,8 +367,8 @@ double kll_sketch<T, C, S, A>::get_normalized_rank_error(bool pmf) const {
358
367
 
359
368
  // implementation for fixed-size arithmetic types (integral and floating point)
360
369
  template<typename T, typename C, typename S, typename A>
361
- template<typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
362
- size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes() const {
370
+ template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
371
+ size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe&) const {
363
372
  if (is_empty()) { return EMPTY_SIZE_BYTES; }
364
373
  if (num_levels_ == 1 && get_num_retained() == 1) {
365
374
  return DATA_START_SINGLE_ITEM + sizeof(TT);
@@ -370,17 +379,17 @@ size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes() const {
370
379
 
371
380
  // implementation for all other types
372
381
  template<typename T, typename C, typename S, typename A>
373
- template<typename TT, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
374
- size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes() const {
382
+ template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
383
+ size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const {
375
384
  if (is_empty()) { return EMPTY_SIZE_BYTES; }
376
385
  if (num_levels_ == 1 && get_num_retained() == 1) {
377
- return DATA_START_SINGLE_ITEM + S().size_of_item(items_[levels_[0]]);
386
+ return DATA_START_SINGLE_ITEM + sd.size_of_item(items_[levels_[0]]);
378
387
  }
379
388
  // the last integer in the levels_ array is not serialized because it can be derived
380
389
  size_t size = DATA_START + num_levels_ * sizeof(uint32_t);
381
- size += S().size_of_item(*min_value_);
382
- size += S().size_of_item(*max_value_);
383
- for (auto it: *this) size += S().size_of_item(it.first);
390
+ size += sd.size_of_item(*min_value_);
391
+ size += sd.size_of_item(*max_value_);
392
+ for (auto it: *this) size += sd.size_of_item(it.first);
384
393
  return size;
385
394
  }
386
395
 
@@ -405,7 +414,8 @@ size_t kll_sketch<T, C, S, A>::get_max_serialized_size_bytes(uint16_t k, uint64_
405
414
  }
406
415
 
407
416
  template<typename T, typename C, typename S, typename A>
408
- void kll_sketch<T, C, S, A>::serialize(std::ostream& os) const {
417
+ template<typename SerDe>
418
+ void kll_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const {
409
419
  const bool is_single_item = n_ == 1;
410
420
  const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
411
421
  write(os, preamble_ints);
@@ -430,16 +440,17 @@ void kll_sketch<T, C, S, A>::serialize(std::ostream& os) const {
430
440
  write(os, num_levels_);
431
441
  write(os, unused);
432
442
  write(os, levels_.data(), sizeof(levels_[0]) * num_levels_);
433
- S().serialize(os, min_value_, 1);
434
- S().serialize(os, max_value_, 1);
443
+ sd.serialize(os, min_value_, 1);
444
+ sd.serialize(os, max_value_, 1);
435
445
  }
436
- S().serialize(os, &items_[levels_[0]], get_num_retained());
446
+ sd.serialize(os, &items_[levels_[0]], get_num_retained());
437
447
  }
438
448
 
439
449
  template<typename T, typename C, typename S, typename A>
440
- vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const {
450
+ template<typename SerDe>
451
+ vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
441
452
  const bool is_single_item = n_ == 1;
442
- const size_t size = header_size_bytes + get_serialized_size_bytes();
453
+ const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
443
454
  vector_u8<A> bytes(size, 0, allocator_);
444
455
  uint8_t* ptr = bytes.data() + header_size_bytes;
445
456
  const uint8_t* end_ptr = ptr + size;
@@ -465,11 +476,11 @@ vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const
465
476
  ptr += copy_to_mem(num_levels_, ptr);
466
477
  ptr += sizeof(uint8_t); // unused
467
478
  ptr += copy_to_mem(levels_.data(), ptr, sizeof(levels_[0]) * num_levels_);
468
- ptr += S().serialize(ptr, end_ptr - ptr, min_value_, 1);
469
- ptr += S().serialize(ptr, end_ptr - ptr, max_value_, 1);
479
+ ptr += sd.serialize(ptr, end_ptr - ptr, min_value_, 1);
480
+ ptr += sd.serialize(ptr, end_ptr - ptr, max_value_, 1);
470
481
  }
471
482
  const size_t bytes_remaining = end_ptr - ptr;
472
- ptr += S().serialize(ptr, bytes_remaining, &items_[levels_[0]], get_num_retained());
483
+ ptr += sd.serialize(ptr, bytes_remaining, &items_[levels_[0]], get_num_retained());
473
484
  }
474
485
  const size_t delta = ptr - bytes.data();
475
486
  if (delta != size) throw std::logic_error("serialized size mismatch: " + std::to_string(delta) + " != " + std::to_string(size));
@@ -478,6 +489,12 @@ vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const
478
489
 
479
490
  template<typename T, typename C, typename S, typename A>
480
491
  kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, const A& allocator) {
492
+ return deserialize(is, S(), allocator);
493
+ }
494
+
495
+ template<typename T, typename C, typename S, typename A>
496
+ template<typename SerDe>
497
+ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
481
498
  const auto preamble_ints = read<uint8_t>(is);
482
499
  const auto serial_version = read<uint8_t>(is);
483
500
  const auto family_id = read<uint8_t>(is);
@@ -525,17 +542,17 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, con
525
542
  std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
526
543
  std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
527
544
  if (!is_single_item) {
528
- S().deserialize(is, min_value_buffer.get(), 1);
545
+ sd.deserialize(is, min_value_buffer.get(), 1);
529
546
  // serde call did not throw, repackage with destrtuctor
530
547
  min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
531
- S().deserialize(is, max_value_buffer.get(), 1);
548
+ sd.deserialize(is, max_value_buffer.get(), 1);
532
549
  // serde call did not throw, repackage with destrtuctor
533
550
  max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
534
551
  }
535
552
  auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
536
553
  std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(alloc.allocate(capacity), items_buffer_deleter);
537
554
  const auto num_items = levels[num_levels] - levels[0];
538
- S().deserialize(is, &items_buffer.get()[levels[0]], num_items);
555
+ sd.deserialize(is, &items_buffer.get()[levels[0]], num_items);
539
556
  // serde call did not throw, repackage with destrtuctors
540
557
  std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity, allocator));
541
558
  const bool is_level_zero_sorted = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
@@ -555,6 +572,12 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, con
555
572
 
556
573
  template<typename T, typename C, typename S, typename A>
557
574
  kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
575
+ return deserialize(bytes, size, S(), allocator);
576
+ }
577
+
578
+ template<typename T, typename C, typename S, typename A>
579
+ template<typename SerDe>
580
+ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
558
581
  ensure_minimum_memory(size, 8);
559
582
  const char* ptr = static_cast<const char*>(bytes);
560
583
  uint8_t preamble_ints;
@@ -611,17 +634,17 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
611
634
  std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
612
635
  std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
613
636
  if (!is_single_item) {
614
- ptr += S().deserialize(ptr, end_ptr - ptr, min_value_buffer.get(), 1);
637
+ ptr += sd.deserialize(ptr, end_ptr - ptr, min_value_buffer.get(), 1);
615
638
  // serde call did not throw, repackage with destrtuctor
616
639
  min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
617
- ptr += S().deserialize(ptr, end_ptr - ptr, max_value_buffer.get(), 1);
640
+ ptr += sd.deserialize(ptr, end_ptr - ptr, max_value_buffer.get(), 1);
618
641
  // serde call did not throw, repackage with destrtuctor
619
642
  max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
620
643
  }
621
644
  auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
622
645
  std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(alloc.allocate(capacity), items_buffer_deleter);
623
646
  const auto num_items = levels[num_levels] - levels[0];
624
- ptr += S().deserialize(ptr, end_ptr - ptr, &items_buffer.get()[levels[0]], num_items);
647
+ ptr += sd.deserialize(ptr, end_ptr - ptr, &items_buffer.get()[levels[0]], num_items);
625
648
  // serde call did not throw, repackage with destrtuctors
626
649
  std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity, allocator));
627
650
  const size_t delta = ptr - static_cast<const char*>(bytes);
@@ -699,7 +722,7 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
699
722
  // level zero might not be sorted, so we must sort it if we wish to compact it
700
723
  // sort_level_zero() is not used here because of the adjustment for odd number of items
701
724
  if ((level == 0) && !is_level_zero_sorted_) {
702
- std::sort(&items_[adj_beg], &items_[adj_beg + adj_pop], C());
725
+ std::sort(items_ + adj_beg, items_ + adj_beg + adj_pop, C());
703
726
  }
704
727
  if (pop_above == 0) {
705
728
  kll_helper::randomly_halve_up(items_, adj_beg, adj_pop);
@@ -722,7 +745,7 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
722
745
  // so that the freed-up space can be used by level zero
723
746
  if (level > 0) {
724
747
  const uint32_t amount = raw_beg - levels_[0];
725
- std::move_backward(&items_[levels_[0]], &items_[levels_[0] + amount], &items_[levels_[0] + half_adj_pop + amount]);
748
+ std::move_backward(items_ + levels_[0], items_ + levels_[0] + amount, items_ + levels_[0] + half_adj_pop + amount);
726
749
  for (uint8_t lvl = 0; lvl < level; lvl++) levels_[lvl] += half_adj_pop;
727
750
  }
728
751
  for (uint32_t i = 0; i < half_adj_pop; i++) items_[i + destroy_beg].~T();
@@ -780,24 +803,39 @@ void kll_sketch<T, C, S, A>::add_empty_top_level_to_completely_full_sketch() {
780
803
  template<typename T, typename C, typename S, typename A>
781
804
  void kll_sketch<T, C, S, A>::sort_level_zero() {
782
805
  if (!is_level_zero_sorted_) {
783
- std::sort(&items_[levels_[0]], &items_[levels_[1]], C());
806
+ std::sort(items_ + levels_[0], items_ + levels_[1], C());
784
807
  is_level_zero_sorted_ = true;
785
808
  }
786
809
  }
787
810
 
788
811
  template<typename T, typename C, typename S, typename A>
789
- std::unique_ptr<kll_quantile_calculator<T, C, A>, std::function<void(kll_quantile_calculator<T, C, A>*)>> kll_sketch<T, C, S, A>::get_quantile_calculator() {
790
- sort_level_zero();
791
- using AllocCalc = typename std::allocator_traits<A>::template rebind_alloc<kll_quantile_calculator<T, C, A>>;
792
- AllocCalc alloc(allocator_);
793
- std::unique_ptr<kll_quantile_calculator<T, C, A>, std::function<void(kll_quantile_calculator<T, C, A>*)>> quantile_calculator(
794
- new (alloc.allocate(1)) kll_quantile_calculator<T, C, A>(*this),
795
- [&alloc](kll_quantile_calculator<T, C, A>* ptr){ ptr->~kll_quantile_calculator<T, C, A>(); alloc.deallocate(ptr, 1); }
796
- );
797
- return quantile_calculator;
812
+ void kll_sketch<T, C, S, A>::check_sorting() const {
813
+ // not checking level 0
814
+ for (uint8_t level = 1; level < num_levels_; ++level) {
815
+ const auto from = items_ + levels_[level];
816
+ const auto to = items_ + levels_[level + 1];
817
+ if (!std::is_sorted(from, to, C())) {
818
+ throw std::logic_error("levels must be sorted");
819
+ }
820
+ }
821
+ }
822
+
823
+ template<typename T, typename C, typename S, typename A>
824
+ template<bool inclusive>
825
+ quantile_sketch_sorted_view<T, C, A> kll_sketch<T, C, S, A>::get_sorted_view(bool cumulative) const {
826
+ const_cast<kll_sketch*>(this)->sort_level_zero(); // allow this side effect
827
+ quantile_sketch_sorted_view<T, C, A> view(get_num_retained(), allocator_);
828
+ for (uint8_t level = 0; level < num_levels_; ++level) {
829
+ const auto from = items_ + levels_[level];
830
+ const auto to = items_ + levels_[level + 1]; // exclusive
831
+ view.add(from, to, 1 << level);
832
+ }
833
+ if (cumulative) view.template convert_to_cummulative<inclusive>();
834
+ return view;
798
835
  }
799
836
 
800
837
  template<typename T, typename C, typename S, typename A>
838
+ template<bool inclusive>
801
839
  vector_d<A> kll_sketch<T, C, S, A>::get_PMF_or_CDF(const T* split_points, uint32_t size, bool is_CDF) const {
802
840
  if (is_empty()) return vector_d<A>(allocator_);
803
841
  kll_helper::validate_values<T, C>(split_points, size);
@@ -808,9 +846,9 @@ vector_d<A> kll_sketch<T, C, S, A>::get_PMF_or_CDF(const T* split_points, uint32
808
846
  const auto from_index = levels_[level];
809
847
  const auto to_index = levels_[level + 1]; // exclusive
810
848
  if ((level == 0) && !is_level_zero_sorted_) {
811
- increment_buckets_unsorted_level(from_index, to_index, weight, split_points, size, buckets.data());
849
+ increment_buckets_unsorted_level<inclusive>(from_index, to_index, weight, split_points, size, buckets.data());
812
850
  } else {
813
- increment_buckets_sorted_level(from_index, to_index, weight, split_points, size, buckets.data());
851
+ increment_buckets_sorted_level<inclusive>(from_index, to_index, weight, split_points, size, buckets.data());
814
852
  }
815
853
  level++;
816
854
  weight *= 2;
@@ -831,13 +869,14 @@ vector_d<A> kll_sketch<T, C, S, A>::get_PMF_or_CDF(const T* split_points, uint32
831
869
  }
832
870
 
833
871
  template<typename T, typename C, typename S, typename A>
872
+ template<bool inclusive>
834
873
  void kll_sketch<T, C, S, A>::increment_buckets_unsorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
835
874
  const T* split_points, uint32_t size, double* buckets) const
836
875
  {
837
876
  for (uint32_t i = from_index; i < to_index; i++) {
838
877
  uint32_t j;
839
878
  for (j = 0; j < size; j++) {
840
- if (C()(items_[i], split_points[j])) {
879
+ if (inclusive ? !C()(split_points[j], items_[i]) : C()(items_[i], split_points[j])) {
841
880
  break;
842
881
  }
843
882
  }
@@ -846,13 +885,14 @@ void kll_sketch<T, C, S, A>::increment_buckets_unsorted_level(uint32_t from_inde
846
885
  }
847
886
 
848
887
  template<typename T, typename C, typename S, typename A>
888
+ template<bool inclusive>
849
889
  void kll_sketch<T, C, S, A>::increment_buckets_sorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
850
890
  const T* split_points, uint32_t size, double* buckets) const
851
891
  {
852
892
  uint32_t i = from_index;
853
893
  uint32_t j = 0;
854
894
  while ((i < to_index) && (j < size)) {
855
- if (C()(items_[i], split_points[j])) {
895
+ if (inclusive ? !C()(split_points[j], items_[i]) : C()(items_[i], split_points[j])) {
856
896
  buckets[j] += weight; // this sample goes into this bucket
857
897
  i++; // move on to next sample and see whether it also goes into this bucket
858
898
  } else {
@@ -910,9 +950,9 @@ void kll_sketch<T, C, S, A>::merge_higher_levels(O&& other, uint64_t final_n) {
910
950
  }
911
951
 
912
952
  // this leaves items_ uninitialized (all objects moved out and destroyed)
913
- // this version copies objects from the incoming sketch
914
953
  template<typename T, typename C, typename S, typename A>
915
- void kll_sketch<T, C, S, A>::populate_work_arrays(const kll_sketch& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels) {
954
+ template<typename FwdSk>
955
+ void kll_sketch<T, C, S, A>::populate_work_arrays(FwdSk&& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels) {
916
956
  worklevels[0] = 0;
917
957
 
918
958
  // the level zero data from "other" was already inserted into "this"
@@ -927,32 +967,9 @@ void kll_sketch<T, C, S, A>::populate_work_arrays(const kll_sketch& other, T* wo
927
967
  if ((self_pop > 0) && (other_pop == 0)) {
928
968
  kll_helper::move_construct<T>(items_, levels_[lvl], levels_[lvl] + self_pop, workbuf, worklevels[lvl], true);
929
969
  } else if ((self_pop == 0) && (other_pop > 0)) {
930
- kll_helper::copy_construct<T>(other.items_, other.levels_[lvl], other.levels_[lvl] + other_pop, workbuf, worklevels[lvl]);
931
- } else if ((self_pop > 0) && (other_pop > 0)) {
932
- kll_helper::merge_sorted_arrays<T, C>(items_, levels_[lvl], self_pop, other.items_, other.levels_[lvl], other_pop, workbuf, worklevels[lvl]);
933
- }
934
- }
935
- }
936
-
937
- // this leaves items_ uninitialized (all objects moved out and destroyed)
938
- // this version moves objects from the incoming sketch
939
- template<typename T, typename C, typename S, typename A>
940
- void kll_sketch<T, C, S, A>::populate_work_arrays(kll_sketch&& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels) {
941
- worklevels[0] = 0;
942
-
943
- // the level zero data from "other" was already inserted into "this"
944
- kll_helper::move_construct<T>(items_, levels_[0], levels_[1], workbuf, 0, true);
945
- worklevels[1] = safe_level_size(0);
946
-
947
- for (uint8_t lvl = 1; lvl < provisional_num_levels; lvl++) {
948
- const uint32_t self_pop = safe_level_size(lvl);
949
- const uint32_t other_pop = other.safe_level_size(lvl);
950
- worklevels[lvl + 1] = worklevels[lvl] + self_pop + other_pop;
951
-
952
- if ((self_pop > 0) && (other_pop == 0)) {
953
- kll_helper::move_construct<T>(items_, levels_[lvl], levels_[lvl] + self_pop, workbuf, worklevels[lvl], true);
954
- } else if ((self_pop == 0) && (other_pop > 0)) {
955
- kll_helper::move_construct<T>(other.items_, other.levels_[lvl], other.levels_[lvl] + other_pop, workbuf, worklevels[lvl], false);
970
+ for (auto i = other.levels_[lvl], j = worklevels[lvl]; i < other.levels_[lvl] + other_pop; ++i, ++j) {
971
+ new (&workbuf[j]) T(conditional_forward<FwdSk>(other.items_[i]));
972
+ }
956
973
  } else if ((self_pop > 0) && (other_pop > 0)) {
957
974
  kll_helper::merge_sorted_arrays<T, C>(items_, levels_[lvl], self_pop, other.items_, other.levels_[lvl], other_pop, workbuf, worklevels[lvl]);
958
975
  }
@@ -1023,7 +1040,9 @@ void kll_sketch<T, C, S, A>::check_family_id(uint8_t family_id) {
1023
1040
 
1024
1041
  template <typename T, typename C, typename S, typename A>
1025
1042
  string<A> kll_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items) const {
1026
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
1043
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
1044
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
1045
+ std::ostringstream os;
1027
1046
  os << "### KLL sketch summary:" << std::endl;
1028
1047
  os << " K : " << k_ << std::endl;
1029
1048
  os << " min K : " << min_k_ << std::endl;
@@ -1037,7 +1056,6 @@ string<A> kll_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
1037
1056
  os << " Sorted : " << (is_level_zero_sorted_ ? "true" : "false") << std::endl;
1038
1057
  os << " Capacity items : " << items_size_ << std::endl;
1039
1058
  os << " Retained items : " << get_num_retained() << std::endl;
1040
- os << " Storage bytes : " << get_serialized_size_bytes() << std::endl;
1041
1059
  if (!is_empty()) {
1042
1060
  os << " Min value : " << *min_value_ << std::endl;
1043
1061
  os << " Max value : " << *max_value_ << std::endl;
@@ -1069,7 +1087,7 @@ string<A> kll_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
1069
1087
  }
1070
1088
  os << "### End sketch data" << std::endl;
1071
1089
  }
1072
- return os.str();
1090
+ return string<A>(os.str().c_str(), allocator_);
1073
1091
  }
1074
1092
 
1075
1093
  template <typename T, typename C, typename S, typename A>