datasketches 0.2.7 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -26,24 +26,24 @@
26
26
  #include <iomanip>
27
27
  #include <sstream>
28
28
 
29
- #include "common_defs.hpp"
30
29
  #include "count_zeros.hpp"
31
30
  #include "conditional_forward.hpp"
32
- #include "quantiles_sketch.hpp"
33
31
 
34
32
  namespace datasketches {
35
33
 
36
34
  template<typename T, typename C, typename A>
37
- quantiles_sketch<T, C, A>::quantiles_sketch(uint16_t k, const A& allocator):
35
+ quantiles_sketch<T, C, A>::quantiles_sketch(uint16_t k, const C& comparator, const A& allocator):
36
+ comparator_(comparator),
38
37
  allocator_(allocator),
38
+ is_base_buffer_sorted_(true),
39
39
  k_(k),
40
40
  n_(0),
41
41
  bit_pattern_(0),
42
42
  base_buffer_(allocator_),
43
43
  levels_(allocator_),
44
- min_value_(nullptr),
45
- max_value_(nullptr),
46
- is_sorted_(true)
44
+ min_item_(nullptr),
45
+ max_item_(nullptr),
46
+ sorted_view_(nullptr)
47
47
  {
48
48
  check_k(k_);
49
49
  base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k));
@@ -51,18 +51,20 @@ is_sorted_(true)
51
51
 
52
52
  template<typename T, typename C, typename A>
53
53
  quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch& other):
54
+ comparator_(other.comparator_),
54
55
  allocator_(other.allocator_),
56
+ is_base_buffer_sorted_(other.is_base_buffer_sorted_),
55
57
  k_(other.k_),
56
58
  n_(other.n_),
57
59
  bit_pattern_(other.bit_pattern_),
58
60
  base_buffer_(other.base_buffer_),
59
61
  levels_(other.levels_),
60
- min_value_(nullptr),
61
- max_value_(nullptr),
62
- is_sorted_(other.is_sorted_)
62
+ min_item_(nullptr),
63
+ max_item_(nullptr),
64
+ sorted_view_(nullptr)
63
65
  {
64
- if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
65
- if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
66
+ if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
67
+ if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
66
68
  for (size_t i = 0; i < levels_.size(); ++i) {
67
69
  if (levels_[i].capacity() != other.levels_[i].capacity()) {
68
70
  levels_[i].reserve(other.levels_[i].capacity());
@@ -72,63 +74,71 @@ is_sorted_(other.is_sorted_)
72
74
 
73
75
  template<typename T, typename C, typename A>
74
76
  quantiles_sketch<T, C, A>::quantiles_sketch(quantiles_sketch&& other) noexcept:
77
+ comparator_(other.comparator_),
75
78
  allocator_(other.allocator_),
79
+ is_base_buffer_sorted_(other.is_base_buffer_sorted_),
76
80
  k_(other.k_),
77
81
  n_(other.n_),
78
82
  bit_pattern_(other.bit_pattern_),
79
83
  base_buffer_(std::move(other.base_buffer_)),
80
84
  levels_(std::move(other.levels_)),
81
- min_value_(other.min_value_),
82
- max_value_(other.max_value_),
83
- is_sorted_(other.is_sorted_)
85
+ min_item_(other.min_item_),
86
+ max_item_(other.max_item_),
87
+ sorted_view_(nullptr)
84
88
  {
85
- other.min_value_ = nullptr;
86
- other.max_value_ = nullptr;
89
+ other.min_item_ = nullptr;
90
+ other.max_item_ = nullptr;
87
91
  }
88
92
 
89
93
  template<typename T, typename C, typename A>
90
94
  quantiles_sketch<T, C, A>& quantiles_sketch<T, C, A>::operator=(const quantiles_sketch& other) {
91
95
  quantiles_sketch<T, C, A> copy(other);
96
+ std::swap(comparator_, copy.comparator_);
92
97
  std::swap(allocator_, copy.allocator_);
98
+ std::swap(is_base_buffer_sorted_, copy.is_base_buffer_sorted_);
93
99
  std::swap(k_, copy.k_);
94
100
  std::swap(n_, copy.n_);
95
101
  std::swap(bit_pattern_, copy.bit_pattern_);
96
102
  std::swap(base_buffer_, copy.base_buffer_);
97
103
  std::swap(levels_, copy.levels_);
98
- std::swap(min_value_, copy.min_value_);
99
- std::swap(max_value_, copy.max_value_);
100
- std::swap(is_sorted_, copy.is_sorted_);
104
+ std::swap(min_item_, copy.min_item_);
105
+ std::swap(max_item_, copy.max_item_);
106
+ reset_sorted_view();
101
107
  return *this;
102
108
  }
103
109
 
104
110
  template<typename T, typename C, typename A>
105
111
  quantiles_sketch<T, C, A>& quantiles_sketch<T, C, A>::operator=(quantiles_sketch&& other) noexcept {
112
+ std::swap(comparator_, other.comparator_);
106
113
  std::swap(allocator_, other.allocator_);
114
+ std::swap(is_base_buffer_sorted_, other.is_base_buffer_sorted_);
107
115
  std::swap(k_, other.k_);
108
116
  std::swap(n_, other.n_);
109
117
  std::swap(bit_pattern_, other.bit_pattern_);
110
118
  std::swap(base_buffer_, other.base_buffer_);
111
119
  std::swap(levels_, other.levels_);
112
- std::swap(min_value_, other.min_value_);
113
- std::swap(max_value_, other.max_value_);
114
- std::swap(is_sorted_, other.is_sorted_);
120
+ std::swap(min_item_, other.min_item_);
121
+ std::swap(max_item_, other.max_item_);
122
+ reset_sorted_view();
115
123
  return *this;
116
124
  }
117
125
 
118
126
  template<typename T, typename C, typename A>
119
127
  quantiles_sketch<T, C, A>::quantiles_sketch(uint16_t k, uint64_t n, uint64_t bit_pattern,
120
128
  Level&& base_buffer, VectorLevels&& levels,
121
- std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value,
122
- bool is_sorted, const A& allocator) :
129
+ std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item,
130
+ bool is_sorted, const C& comparator, const A& allocator):
131
+ comparator_(comparator),
123
132
  allocator_(allocator),
133
+ is_base_buffer_sorted_(is_sorted),
124
134
  k_(k),
125
135
  n_(n),
126
136
  bit_pattern_(bit_pattern),
127
137
  base_buffer_(std::move(base_buffer)),
128
138
  levels_(std::move(levels)),
129
- min_value_(min_value.release()),
130
- max_value_(max_value.release()),
131
- is_sorted_(is_sorted)
139
+ min_item_(min_item.release()),
140
+ max_item_(max_item.release()),
141
+ sorted_view_(nullptr)
132
142
  {
133
143
  uint32_t item_count = base_buffer_.size();
134
144
  for (Level& lvl : levels_) {
@@ -140,16 +150,19 @@ is_sorted_(is_sorted)
140
150
 
141
151
  template<typename T, typename C, typename A>
142
152
  template<typename From, typename FC, typename FA>
143
- quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch<From, FC, FA>& other, const A& allocator) :
153
+ quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch<From, FC, FA>& other,
154
+ const C& comparator, const A& allocator):
155
+ comparator_(comparator),
144
156
  allocator_(allocator),
157
+ is_base_buffer_sorted_(false),
145
158
  k_(other.get_k()),
146
159
  n_(other.get_n()),
147
160
  bit_pattern_(compute_bit_pattern(other.get_k(), other.get_n())),
148
161
  base_buffer_(allocator),
149
162
  levels_(allocator),
150
- min_value_(nullptr),
151
- max_value_(nullptr),
152
- is_sorted_(false)
163
+ min_item_(nullptr),
164
+ max_item_(nullptr),
165
+ sorted_view_(nullptr)
153
166
  {
154
167
  static_assert(std::is_constructible<T, From>::value,
155
168
  "Type converting constructor requires new type to be constructible from existing type");
@@ -157,8 +170,8 @@ is_sorted_(false)
157
170
  base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k_));
158
171
 
159
172
  if (!other.is_empty()) {
160
- min_value_ = new (allocator_.allocate(1)) T(other.get_min_value());
161
- max_value_ = new (allocator_.allocate(1)) T(other.get_max_value());
173
+ min_item_ = new (allocator_.allocate(1)) T(other.get_min_item());
174
+ max_item_ = new (allocator_.allocate(1)) T(other.get_max_item());
162
175
 
163
176
  // reserve space in levels
164
177
  const uint8_t num_levels = compute_levels_needed(k_, n_);
@@ -189,7 +202,7 @@ is_sorted_(false)
189
202
  // validate that ordering within each level is preserved
190
203
  // base_buffer_ can be considered unsorted for this purpose
191
204
  for (int i = 0; i < num_levels; ++i) {
192
- if (!std::is_sorted(levels_[i].begin(), levels_[i].end(), C())) {
205
+ if (!std::is_sorted(levels_[i].begin(), levels_[i].end(), comparator_)) {
193
206
  throw std::logic_error("Copy construction across types produces invalid sorting");
194
207
  }
195
208
  }
@@ -199,40 +212,38 @@ is_sorted_(false)
199
212
 
200
213
  template<typename T, typename C, typename A>
201
214
  quantiles_sketch<T, C, A>::~quantiles_sketch() {
202
- if (min_value_ != nullptr) {
203
- min_value_->~T();
204
- allocator_.deallocate(min_value_, 1);
215
+ if (min_item_ != nullptr) {
216
+ min_item_->~T();
217
+ allocator_.deallocate(min_item_, 1);
205
218
  }
206
- if (max_value_ != nullptr) {
207
- max_value_->~T();
208
- allocator_.deallocate(max_value_, 1);
219
+ if (max_item_ != nullptr) {
220
+ max_item_->~T();
221
+ allocator_.deallocate(max_item_, 1);
209
222
  }
223
+ reset_sorted_view();
210
224
  }
211
225
 
212
226
  template<typename T, typename C, typename A>
213
227
  template<typename FwdT>
214
228
  void quantiles_sketch<T, C, A>::update(FwdT&& item) {
215
- if (!check_update_value(item)) { return; }
229
+ if (!check_update_item(item)) { return; }
216
230
  if (is_empty()) {
217
- min_value_ = new (allocator_.allocate(1)) T(item);
218
- max_value_ = new (allocator_.allocate(1)) T(item);
231
+ min_item_ = new (allocator_.allocate(1)) T(item);
232
+ max_item_ = new (allocator_.allocate(1)) T(item);
219
233
  } else {
220
- if (C()(item, *min_value_)) *min_value_ = item;
221
- if (C()(*max_value_, item)) *max_value_ = item;
234
+ if (comparator_(item, *min_item_)) *min_item_ = item;
235
+ if (comparator_(*max_item_, item)) *max_item_ = item;
222
236
  }
223
237
 
224
238
  // if exceed capacity, grow until size 2k -- assumes eager processing
225
- if (base_buffer_.size() + 1 > base_buffer_.capacity())
226
- grow_base_buffer();
239
+ if (base_buffer_.size() + 1 > base_buffer_.capacity()) grow_base_buffer();
227
240
 
228
241
  base_buffer_.push_back(std::forward<FwdT>(item));
229
242
  ++n_;
230
243
 
231
- if (base_buffer_.size() > 1)
232
- is_sorted_ = false;
233
-
234
- if (base_buffer_.size() == 2 * k_)
235
- process_full_base_buffer();
244
+ if (base_buffer_.size() > 1) is_base_buffer_sorted_ = false;
245
+ if (base_buffer_.size() == 2 * k_) process_full_base_buffer();
246
+ reset_sorted_view();
236
247
  }
237
248
 
238
249
  template<typename T, typename C, typename A>
@@ -245,10 +256,11 @@ void quantiles_sketch<T, C, A>::merge(FwdSk&& other) {
245
256
  for (auto item : other.base_buffer_) {
246
257
  update(conditional_forward<FwdSk>(item));
247
258
  }
248
- return; // we're done
259
+ reset_sorted_view();
260
+ return;
249
261
  }
250
262
 
251
- // we know other has data and is in estimation mode
263
+ // other has data and is in estimation mode
252
264
  if (is_estimation_mode()) {
253
265
  if (k_ == other.get_k()) {
254
266
  standard_merge(*this, other);
@@ -273,6 +285,7 @@ void quantiles_sketch<T, C, A>::merge(FwdSk&& other) {
273
285
  }
274
286
  *this = sk_copy;
275
287
  }
288
+ reset_sorted_view();
276
289
  }
277
290
 
278
291
  template<typename T, typename C, typename A>
@@ -286,8 +299,8 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
286
299
  write(os, family);
287
300
 
288
301
  // side-effect: sort base buffer since always compact
289
- // can't set is_sorted_ since const method
290
- std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), C());
302
+ std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), comparator_);
303
+ const_cast<quantiles_sketch*>(this)->is_base_buffer_sorted_ = true;
291
304
 
292
305
  // empty, ordered, compact are valid flags
293
306
  const uint8_t flags_byte(
@@ -304,8 +317,8 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
304
317
  write(os, n_);
305
318
 
306
319
  // min and max
307
- serde.serialize(os, min_value_, 1);
308
- serde.serialize(os, max_value_, 1);
320
+ serde.serialize(os, min_item_, 1);
321
+ serde.serialize(os, max_item_, 1);
309
322
 
310
323
  // base buffer items
311
324
  serde.serialize(os, base_buffer_.data(), static_cast<unsigned>(base_buffer_.size()));
@@ -334,8 +347,8 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
334
347
  ptr += copy_to_mem(family, ptr);
335
348
 
336
349
  // side-effect: sort base buffer since always compact
337
- // can't set is_sorted_ since const method
338
- std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), C());
350
+ std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), comparator_);
351
+ const_cast<quantiles_sketch*>(this)->is_base_buffer_sorted_ = true;
339
352
 
340
353
  // empty, ordered, compact are valid flags
341
354
  const uint8_t flags_byte(
@@ -352,8 +365,8 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
352
365
  ptr += copy_to_mem(n_, ptr);
353
366
 
354
367
  // min and max
355
- ptr += serde.serialize(ptr, end_ptr - ptr, min_value_, 1);
356
- ptr += serde.serialize(ptr, end_ptr - ptr, max_value_, 1);
368
+ ptr += serde.serialize(ptr, end_ptr - ptr, min_item_, 1);
369
+ ptr += serde.serialize(ptr, end_ptr - ptr, max_item_, 1);
357
370
 
358
371
  // base buffer items
359
372
  if (base_buffer_.size() > 0)
@@ -371,7 +384,8 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
371
384
 
372
385
  template<typename T, typename C, typename A>
373
386
  template<typename SerDe>
374
- auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde, const A &allocator) -> quantiles_sketch {
387
+ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde,
388
+ const C& comparator, const A &allocator) -> quantiles_sketch {
375
389
  const auto preamble_longs = read<uint8_t>(is);
376
390
  const auto serial_version = read<uint8_t>(is);
377
391
  const auto family_id = read<uint8_t>(is);
@@ -387,7 +401,7 @@ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde
387
401
  if (!is.good()) throw std::runtime_error("error reading from std::istream");
388
402
  const bool is_empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
389
403
  if (is_empty) {
390
- return quantiles_sketch(k, allocator);
404
+ return quantiles_sketch(k, comparator, allocator);
391
405
  }
392
406
 
393
407
  const auto items_seen = read<uint64_t>(is);
@@ -397,17 +411,17 @@ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde
397
411
 
398
412
  A alloc(allocator);
399
413
  auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
400
- std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
401
- std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
402
- std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
403
- std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
414
+ std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
415
+ std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
416
+ std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
417
+ std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
404
418
 
405
- serde.deserialize(is, min_value_buffer.get(), 1);
419
+ serde.deserialize(is, min_item_buffer.get(), 1);
406
420
  // serde call did not throw, repackage with destrtuctor
407
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
408
- serde.deserialize(is, max_value_buffer.get(), 1);
421
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
422
+ serde.deserialize(is, max_item_buffer.get(), 1);
409
423
  // serde call did not throw, repackage with destrtuctor
410
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
424
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
411
425
 
412
426
  if (serial_version == 1) {
413
427
  read<uint64_t>(is); // no longer used
@@ -449,7 +463,8 @@ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde
449
463
  }
450
464
 
451
465
  return quantiles_sketch(k, items_seen, bit_pattern,
452
- std::move(base_buffer), std::move(levels), std::move(min_value), std::move(max_value), is_sorted, allocator);
466
+ std::move(base_buffer), std::move(levels), std::move(min_item), std::move(max_item), is_sorted,
467
+ comparator, allocator);
453
468
  }
454
469
 
455
470
  template<typename T, typename C, typename A>
@@ -473,7 +488,8 @@ auto quantiles_sketch<T, C, A>::deserialize_array(std::istream& is, uint32_t num
473
488
 
474
489
  template<typename T, typename C, typename A>
475
490
  template<typename SerDe>
476
- auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, const SerDe& serde, const A &allocator) -> quantiles_sketch {
491
+ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, const SerDe& serde,
492
+ const C& comparator, const A &allocator) -> quantiles_sketch {
477
493
  ensure_minimum_memory(size, 8);
478
494
  const char* ptr = static_cast<const char*>(bytes);
479
495
  const char* end_ptr = static_cast<const char*>(bytes) + size;
@@ -498,7 +514,7 @@ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, cons
498
514
 
499
515
  const bool is_empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
500
516
  if (is_empty) {
501
- return quantiles_sketch(k, allocator);
517
+ return quantiles_sketch(k, comparator, allocator);
502
518
  }
503
519
 
504
520
  ensure_minimum_memory(size, 16);
@@ -510,17 +526,17 @@ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, cons
510
526
 
511
527
  A alloc(allocator);
512
528
  auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
513
- std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
514
- std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
515
- std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
516
- std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
529
+ std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
530
+ std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
531
+ std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
532
+ std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
517
533
 
518
- ptr += serde.deserialize(ptr, end_ptr - ptr, min_value_buffer.get(), 1);
534
+ ptr += serde.deserialize(ptr, end_ptr - ptr, min_item_buffer.get(), 1);
519
535
  // serde call did not throw, repackage with destrtuctor
520
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
521
- ptr += serde.deserialize(ptr, end_ptr - ptr, max_value_buffer.get(), 1);
536
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
537
+ ptr += serde.deserialize(ptr, end_ptr - ptr, max_item_buffer.get(), 1);
522
538
  // serde call did not throw, repackage with destrtuctor
523
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
539
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
524
540
 
525
541
  if (serial_version == 1) {
526
542
  uint64_t unused_long;
@@ -567,7 +583,8 @@ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, cons
567
583
  }
568
584
 
569
585
  return quantiles_sketch(k, items_seen, bit_pattern,
570
- std::move(base_buffer_pair.first), std::move(levels), std::move(min_value), std::move(max_value), is_sorted, allocator);
586
+ std::move(base_buffer_pair.first), std::move(levels), std::move(min_item), std::move(max_item), is_sorted,
587
+ comparator, allocator);
571
588
  }
572
589
 
573
590
  template<typename T, typename C, typename A>
@@ -605,11 +622,11 @@ string<A> quantiles_sketch<T, C, A>::to_string(bool print_levels, bool print_ite
605
622
  os << " Empty : " << (is_empty() ? "true" : "false") << std::endl;
606
623
  os << " Estimation mode: " << (is_estimation_mode() ? "true" : "false") << std::endl;
607
624
  os << " Levels (w/o BB): " << levels_.size() << std::endl;
608
- os << " Used Levels : " << compute_valid_levels(bit_pattern_) << std::endl;
625
+ os << " Used Levels : " << count_valid_levels(bit_pattern_) << std::endl;
609
626
  os << " Retained items : " << get_num_retained() << std::endl;
610
627
  if (!is_empty()) {
611
- os << " Min value : " << *min_value_ << std::endl;
612
- os << " Max value : " << *max_value_ << std::endl;
628
+ os << " Min item : " << *min_item_ << std::endl;
629
+ os << " Max item : " << *max_item_ << std::endl;
613
630
  }
614
631
  os << "### End sketch summary" << std::endl;
615
632
 
@@ -667,20 +684,20 @@ uint32_t quantiles_sketch<T, C, A>::get_num_retained() const {
667
684
  }
668
685
 
669
686
  template<typename T, typename C, typename A>
670
- const T& quantiles_sketch<T, C, A>::get_min_value() const {
671
- if (is_empty()) return get_invalid_value();
672
- return *min_value_;
687
+ const T& quantiles_sketch<T, C, A>::get_min_item() const {
688
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
689
+ return *min_item_;
673
690
  }
674
691
 
675
692
  template<typename T, typename C, typename A>
676
- const T& quantiles_sketch<T, C, A>::get_max_value() const {
677
- if (is_empty()) return get_invalid_value();
678
- return *max_value_;
693
+ const T& quantiles_sketch<T, C, A>::get_max_item() const {
694
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
695
+ return *max_item_;
679
696
  }
680
697
 
681
698
  template<typename T, typename C, typename A>
682
699
  C quantiles_sketch<T, C, A>::get_comparator() const {
683
- return C();
700
+ return comparator_;
684
701
  }
685
702
 
686
703
  template<typename T, typename C, typename A>
@@ -702,8 +719,8 @@ template<typename SerDe, typename TT, typename std::enable_if<!std::is_arithmeti
702
719
  size_t quantiles_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& serde) const {
703
720
  if (is_empty()) { return EMPTY_SIZE_BYTES; }
704
721
  size_t size = DATA_START;
705
- size += serde.size_of_item(*min_value_);
706
- size += serde.size_of_item(*max_value_);
722
+ size += serde.size_of_item(*min_item_);
723
+ size += serde.size_of_item(*max_item_);
707
724
  for (auto it: *this) size += serde.size_of_item(it.first);
708
725
  return size;
709
726
  }
@@ -721,162 +738,121 @@ double quantiles_sketch<T, C, A>::get_normalized_rank_error(uint16_t k, bool is_
721
738
  }
722
739
 
723
740
  template<typename T, typename C, typename A>
724
- template<bool inclusive>
725
- quantile_sketch_sorted_view<T, C, A> quantiles_sketch<T, C, A>::get_sorted_view(bool cumulative) const {
726
- // allow side-effect of sorting the base buffer; can't set the flag since
727
- // this is a const method
728
- if (!is_sorted_) {
729
- std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), C());
741
+ quantiles_sorted_view<T, C, A> quantiles_sketch<T, C, A>::get_sorted_view() const {
742
+ // allow side-effect of sorting the base buffer
743
+ if (!is_base_buffer_sorted_) {
744
+ std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), comparator_);
745
+ const_cast<quantiles_sketch*>(this)->is_base_buffer_sorted_ = true;
730
746
  }
731
- quantile_sketch_sorted_view<T, C, A> view(get_num_retained(), allocator_);
747
+ quantiles_sorted_view<T, C, A> view(get_num_retained(), comparator_, allocator_);
732
748
 
733
749
  uint64_t weight = 1;
734
750
  view.add(base_buffer_.begin(), base_buffer_.end(), weight);
735
- for (auto& level : levels_) {
751
+ for (const auto& level: levels_) {
736
752
  weight <<= 1;
737
753
  if (level.empty()) { continue; }
738
754
  view.add(level.begin(), level.end(), weight);
739
755
  }
740
756
 
741
- if (cumulative) view.template convert_to_cummulative<inclusive>();
757
+ view.convert_to_cummulative();
742
758
  return view;
743
759
  }
744
760
 
745
761
  template<typename T, typename C, typename A>
746
- template<bool inclusive>
747
- auto quantiles_sketch<T, C, A>::get_quantile(double rank) const -> quantile_return_type {
748
- if (is_empty()) return get_invalid_value();
749
- if (rank == 0.0) return *min_value_;
750
- if (rank == 1.0) return *max_value_;
762
+ auto quantiles_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
763
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
751
764
  if ((rank < 0.0) || (rank > 1.0)) {
752
- throw std::invalid_argument("Rank cannot be less than zero or greater than 1.0");
765
+ throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
753
766
  }
754
767
  // possible side-effect: sorting base buffer
755
- return get_sorted_view<inclusive>(true).get_quantile(rank);
768
+ setup_sorted_view();
769
+ return sorted_view_->get_quantile(rank, inclusive);
756
770
  }
757
771
 
758
772
  template<typename T, typename C, typename A>
759
- template<bool inclusive>
760
- std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size) const {
773
+ std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
774
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
761
775
  std::vector<T, A> quantiles(allocator_);
762
- if (is_empty()) return quantiles;
763
776
  quantiles.reserve(size);
764
777
 
765
778
  // possible side-effect: sorting base buffer
766
- auto view = get_sorted_view<inclusive>(true);
779
+ setup_sorted_view();
767
780
 
768
781
  for (uint32_t i = 0; i < size; ++i) {
769
782
  const double rank = ranks[i];
770
783
  if ((rank < 0.0) || (rank > 1.0)) {
771
- throw std::invalid_argument("rank cannot be less than zero or greater than 1.0");
772
- }
773
- if (rank == 0.0) quantiles.push_back(*min_value_);
774
- else if (rank == 1.0) quantiles.push_back(*max_value_);
775
- else {
776
- quantiles.push_back(view.get_quantile(rank));
784
+ throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
777
785
  }
786
+ quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
778
787
  }
779
788
  return quantiles;
780
789
  }
781
790
 
782
791
  template<typename T, typename C, typename A>
783
- template<bool inclusive>
784
- std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(uint32_t num) const {
785
- if (is_empty()) return std::vector<T, A>(allocator_);
792
+ std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(uint32_t num, bool inclusive) const {
793
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
786
794
  if (num == 0) {
787
795
  throw std::invalid_argument("num must be > 0");
788
796
  }
789
- vector_double fractions(num, 0, allocator_);
790
- fractions[0] = 0.0;
797
+ vector_double ranks(num, 0, allocator_);
798
+ ranks[0] = 0.0;
791
799
  for (size_t i = 1; i < num; i++) {
792
- fractions[i] = static_cast<double>(i) / (num - 1);
800
+ ranks[i] = static_cast<double>(i) / (num - 1);
793
801
  }
794
802
  if (num > 1) {
795
- fractions[num - 1] = 1.0;
803
+ ranks[num - 1] = 1.0;
796
804
  }
797
- return get_quantiles<inclusive>(fractions.data(), num);
805
+ return get_quantiles(ranks.data(), num, inclusive);
798
806
  }
799
807
 
800
808
  template<typename T, typename C, typename A>
801
- template<bool inclusive>
802
- double quantiles_sketch<T, C, A>::get_rank(const T& value) const {
803
- if (is_empty()) return std::numeric_limits<double>::quiet_NaN();
804
- uint64_t weight = 1;
805
- uint64_t total = 0;
806
- for (const T &item: base_buffer_) {
807
- if (inclusive ? !C()(value, item) : C()(item, value))
808
- total += weight;
809
- }
810
-
811
- weight *= 2;
812
- for (uint8_t level = 0; level < levels_.size(); ++level, weight *= 2) {
813
- if (levels_[level].empty()) { continue; }
814
- const T* data = levels_[level].data();
815
- for (uint16_t i = 0; i < k_; ++i) {
816
- if (inclusive ? !C()(value, data[i]) : C()(data[i], value))
817
- total += weight;
818
- else
819
- break; // levels are sorted, no point comparing further
820
- }
821
- }
822
- return (double) total / n_;
809
+ double quantiles_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
810
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
811
+ setup_sorted_view();
812
+ return sorted_view_->get_rank(item, inclusive);
823
813
  }
824
814
 
825
815
  template<typename T, typename C, typename A>
826
- template<bool inclusive>
827
- auto quantiles_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size) const -> vector_double {
828
- auto buckets = get_CDF<inclusive>(split_points, size);
829
- if (is_empty()) return buckets;
830
- for (uint32_t i = size; i > 0; --i) {
831
- buckets[i] -= buckets[i - 1];
832
- }
833
- return buckets;
816
+ auto quantiles_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
817
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
818
+ setup_sorted_view();
819
+ return sorted_view_->get_PMF(split_points, size, inclusive);
834
820
  }
835
821
 
836
822
  template<typename T, typename C, typename A>
837
- template<bool inclusive>
838
- auto quantiles_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size) const -> vector_double {
839
- vector_double buckets(allocator_);
840
- if (is_empty()) return buckets;
841
- check_split_points(split_points, size);
842
- buckets.reserve(size + 1);
843
- for (uint32_t i = 0; i < size; ++i) buckets.push_back(get_rank<inclusive>(split_points[i]));
844
- buckets.push_back(1);
845
- return buckets;
823
+ auto quantiles_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
824
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
825
+ setup_sorted_view();
826
+ return sorted_view_->get_CDF(split_points, size, inclusive);
846
827
  }
847
828
 
848
829
  template<typename T, typename C, typename A>
849
- uint32_t quantiles_sketch<T, C, A>::compute_retained_items(const uint16_t k, const uint64_t n) {
830
+ uint32_t quantiles_sketch<T, C, A>::compute_retained_items(uint16_t k, uint64_t n) {
850
831
  const uint32_t bb_count = compute_base_buffer_items(k, n);
851
832
  const uint64_t bit_pattern = compute_bit_pattern(k, n);
852
- const uint32_t valid_levels = compute_valid_levels(bit_pattern);
833
+ const uint32_t valid_levels = count_valid_levels(bit_pattern);
853
834
  return bb_count + (k * valid_levels);
854
835
  }
855
836
 
856
837
  template<typename T, typename C, typename A>
857
- uint32_t quantiles_sketch<T, C, A>::compute_base_buffer_items(const uint16_t k, const uint64_t n) {
838
+ uint32_t quantiles_sketch<T, C, A>::compute_base_buffer_items(uint16_t k, uint64_t n) {
858
839
  return n % (static_cast<uint64_t>(2) * k);
859
840
  }
860
841
 
861
842
  template<typename T, typename C, typename A>
862
- uint64_t quantiles_sketch<T, C, A>::compute_bit_pattern(const uint16_t k, const uint64_t n) {
843
+ uint64_t quantiles_sketch<T, C, A>::compute_bit_pattern(uint16_t k, uint64_t n) {
863
844
  return n / (static_cast<uint64_t>(2) * k);
864
845
  }
865
846
 
866
847
  template<typename T, typename C, typename A>
867
- uint32_t quantiles_sketch<T, C, A>::compute_valid_levels(const uint64_t bit_pattern) {
868
- // TODO: Java's Long.bitCount() probably uses a better method
869
- uint64_t bp = bit_pattern;
848
+ uint32_t quantiles_sketch<T, C, A>::count_valid_levels(uint64_t bit_pattern) {
870
849
  uint32_t count = 0;
871
- while (bp > 0) {
872
- if ((bp & 0x01) == 1) ++count;
873
- bp >>= 1;
874
- }
850
+ for (; bit_pattern > 0; ++count) bit_pattern &= bit_pattern - 1;
875
851
  return count;
876
852
  }
877
853
 
878
854
  template<typename T, typename C, typename A>
879
- uint8_t quantiles_sketch<T, C, A>::compute_levels_needed(const uint16_t k, const uint64_t n) {
855
+ uint8_t quantiles_sketch<T, C, A>::compute_levels_needed(uint16_t k, uint64_t n) {
880
856
  return static_cast<uint8_t>(64U) - count_leading_zeros_in_u64(n / (2 * k));
881
857
  }
882
858
 
@@ -961,13 +937,13 @@ void quantiles_sketch<T, C, A>::process_full_base_buffer() {
961
937
  // make sure there will be enough levels for the propagation
962
938
  grow_levels_if_needed(); // note: n_ was already incremented by update() before this
963
939
 
964
- std::sort(base_buffer_.begin(), base_buffer_.end(), C());
940
+ std::sort(base_buffer_.begin(), base_buffer_.end(), comparator_);
965
941
  in_place_propagate_carry(0,
966
942
  levels_[0], // unused here, but 0 is guaranteed to exist
967
943
  base_buffer_,
968
944
  true, *this);
969
945
  base_buffer_.clear();
970
- is_sorted_ = true;
946
+ is_base_buffer_sorted_ = true;
971
947
  if (n_ / (2 * k_) != bit_pattern_) {
972
948
  throw std::logic_error("Internal error: n / 2k (" + std::to_string(n_ / 2 * k_)
973
949
  + " != bit_pattern " + std::to_string(bit_pattern_));
@@ -1019,7 +995,7 @@ void quantiles_sketch<T, C, A>::in_place_propagate_carry(uint8_t starting_level,
1019
995
  merge_two_size_k_buffers(
1020
996
  sketch.levels_[lvl],
1021
997
  sketch.levels_[ending_level],
1022
- buf_size_2k);
998
+ buf_size_2k, sketch.get_comparator());
1023
999
  sketch.levels_[lvl].clear();
1024
1000
  sketch.levels_[ending_level].clear();
1025
1001
  zip_buffer(buf_size_2k, sketch.levels_[ending_level]);
@@ -1071,9 +1047,9 @@ void quantiles_sketch<T, C, A>::zip_buffer_with_stride(FwdV&& buf_in, Level& buf
1071
1047
  // do not clear input buffer
1072
1048
  }
1073
1049
 
1074
-
1075
1050
  template<typename T, typename C, typename A>
1076
- void quantiles_sketch<T, C, A>::merge_two_size_k_buffers(Level& src_1, Level& src_2, Level& dst) {
1051
+ void quantiles_sketch<T, C, A>::merge_two_size_k_buffers(Level& src_1, Level& src_2,
1052
+ Level& dst, const C& comparator) {
1077
1053
  if (src_1.size() != src_2.size()
1078
1054
  || src_1.size() * 2 != dst.capacity()
1079
1055
  || dst.size() != 0) {
@@ -1085,7 +1061,7 @@ void quantiles_sketch<T, C, A>::merge_two_size_k_buffers(Level& src_1, Level& sr
1085
1061
 
1086
1062
  // TODO: probably actually doing copies given Level&?
1087
1063
  while (it1 != end1 && it2 != end2) {
1088
- if (C()(*it1, *it2)) {
1064
+ if (comparator(*it1, *it2)) {
1089
1065
  dst.push_back(std::move(*it1++));
1090
1066
  } else {
1091
1067
  dst.push_back(std::move(*it2++));
@@ -1100,7 +1076,6 @@ void quantiles_sketch<T, C, A>::merge_two_size_k_buffers(Level& src_1, Level& sr
1100
1076
  }
1101
1077
  }
1102
1078
 
1103
-
1104
1079
  template<typename T, typename C, typename A>
1105
1080
  template<typename FwdSk>
1106
1081
  void quantiles_sketch<T, C, A>::standard_merge(quantiles_sketch& tgt, FwdSk&& src) {
@@ -1149,25 +1124,24 @@ void quantiles_sketch<T, C, A>::standard_merge(quantiles_sketch& tgt, FwdSk&& sr
1149
1124
  throw std::logic_error("Failed internal consistency check after standard_merge()");
1150
1125
  }
1151
1126
 
1152
- // update min and max values
1127
+ // update min and max items
1153
1128
  // can't just check is_empty() since min and max might not have been set if
1154
1129
  // there were no base buffer items added via update()
1155
- if (tgt.min_value_ == nullptr) {
1156
- tgt.min_value_ = new (tgt.allocator_.allocate(1)) T(*src.min_value_);
1130
+ if (tgt.min_item_ == nullptr) {
1131
+ tgt.min_item_ = new (tgt.allocator_.allocate(1)) T(*src.min_item_);
1157
1132
  } else {
1158
- if (C()(*src.min_value_, *tgt.min_value_))
1159
- *tgt.min_value_ = conditional_forward<FwdSk>(*src.min_value_);
1133
+ if (tgt.comparator_(*src.min_item_, *tgt.min_item_))
1134
+ *tgt.min_item_ = conditional_forward<FwdSk>(*src.min_item_);
1160
1135
  }
1161
1136
 
1162
- if (tgt.max_value_ == nullptr) {
1163
- tgt.max_value_ = new (tgt.allocator_.allocate(1)) T(*src.max_value_);
1137
+ if (tgt.max_item_ == nullptr) {
1138
+ tgt.max_item_ = new (tgt.allocator_.allocate(1)) T(*src.max_item_);
1164
1139
  } else {
1165
- if (C()(*tgt.max_value_, *src.max_value_))
1166
- *tgt.max_value_ = conditional_forward<FwdSk>(*src.max_value_);
1140
+ if (tgt.comparator_(*tgt.max_item_, *src.max_item_))
1141
+ *tgt.max_item_ = conditional_forward<FwdSk>(*src.max_item_);
1167
1142
  }
1168
1143
  }
1169
1144
 
1170
-
1171
1145
  template<typename T, typename C, typename A>
1172
1146
  template<typename FwdSk>
1173
1147
  void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&& src) {
@@ -1226,25 +1200,24 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
1226
1200
  throw std::logic_error("Failed internal consistency check after downsampling_merge()");
1227
1201
  }
1228
1202
 
1229
- // update min and max values
1203
+ // update min and max items
1230
1204
  // can't just check is_empty() since min and max might not have been set if
1231
1205
  // there were no base buffer items added via update()
1232
- if (tgt.min_value_ == nullptr) {
1233
- tgt.min_value_ = new (tgt.allocator_.allocate(1)) T(*src.min_value_);
1206
+ if (tgt.min_item_ == nullptr) {
1207
+ tgt.min_item_ = new (tgt.allocator_.allocate(1)) T(*src.min_item_);
1234
1208
  } else {
1235
- if (C()(*src.min_value_, *tgt.min_value_))
1236
- *tgt.min_value_ = conditional_forward<FwdSk>(*src.min_value_);
1209
+ if (tgt.comparator_(*src.min_item_, *tgt.min_item_))
1210
+ *tgt.min_item_ = conditional_forward<FwdSk>(*src.min_item_);
1237
1211
  }
1238
1212
 
1239
- if (tgt.max_value_ == nullptr) {
1240
- tgt.max_value_ = new (tgt.allocator_.allocate(1)) T(*src.max_value_);
1213
+ if (tgt.max_item_ == nullptr) {
1214
+ tgt.max_item_ = new (tgt.allocator_.allocate(1)) T(*src.max_item_);
1241
1215
  } else {
1242
- if (C()(*tgt.max_value_, *src.max_value_))
1243
- *tgt.max_value_ = conditional_forward<FwdSk>(*src.max_value_);
1216
+ if (tgt.comparator_(*tgt.max_item_, *src.max_item_))
1217
+ *tgt.max_item_ = conditional_forward<FwdSk>(*src.max_item_);
1244
1218
  }
1245
1219
  }
1246
1220
 
1247
-
1248
1221
  template<typename T, typename C, typename A>
1249
1222
  uint8_t quantiles_sketch<T, C, A>::lowest_zero_bit_starting_at(uint64_t bits, uint8_t starting_bit) {
1250
1223
  uint8_t pos = starting_bit & 0X3F;
@@ -1292,6 +1265,23 @@ class quantiles_sketch<T, C, A>::items_deleter {
1292
1265
  size_t num_;
1293
1266
  };
1294
1267
 
1268
+ template<typename T, typename C, typename A>
1269
+ void quantiles_sketch<T, C, A>::setup_sorted_view() const {
1270
+ if (sorted_view_ == nullptr) {
1271
+ using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
1272
+ sorted_view_ = new (AllocSortedView(allocator_).allocate(1)) quantiles_sorted_view<T, C, A>(get_sorted_view());
1273
+ }
1274
+ }
1275
+
1276
+ template<typename T, typename C, typename A>
1277
+ void quantiles_sketch<T, C, A>::reset_sorted_view() {
1278
+ if (sorted_view_ != nullptr) {
1279
+ sorted_view_->~quantiles_sorted_view();
1280
+ using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
1281
+ AllocSortedView(allocator_).deallocate(sorted_view_, 1);
1282
+ sorted_view_ = nullptr;
1283
+ }
1284
+ }
1295
1285
 
1296
1286
  // quantiles_sketch::const_iterator implementation
1297
1287
 
@@ -1364,8 +1354,13 @@ bool quantiles_sketch<T, C, A>::const_iterator::operator!=(const const_iterator&
1364
1354
  }
1365
1355
 
1366
1356
  template<typename T, typename C, typename A>
1367
- std::pair<const T&, const uint64_t> quantiles_sketch<T, C, A>::const_iterator::operator*() const {
1368
- return std::pair<const T&, const uint64_t>(level_ == -1 ? base_buffer_[index_] : levels_[level_][index_], weight_);
1357
+ auto quantiles_sketch<T, C, A>::const_iterator::operator*() const -> const value_type {
1358
+ return value_type(level_ == -1 ? base_buffer_[index_] : levels_[level_][index_], weight_);
1359
+ }
1360
+
1361
+ template<typename T, typename C, typename A>
1362
+ auto quantiles_sketch<T, C, A>::const_iterator::operator->() const -> const return_value_holder<value_type> {
1363
+ return **this;
1369
1364
  }
1370
1365
 
1371
1366
  } /* namespace datasketches */