datasketches 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -26,24 +26,24 @@
26
26
  #include <iomanip>
27
27
  #include <sstream>
28
28
 
29
- #include "common_defs.hpp"
30
29
  #include "count_zeros.hpp"
31
30
  #include "conditional_forward.hpp"
32
- #include "quantiles_sketch.hpp"
33
31
 
34
32
  namespace datasketches {
35
33
 
36
34
  template<typename T, typename C, typename A>
37
- quantiles_sketch<T, C, A>::quantiles_sketch(uint16_t k, const A& allocator):
35
+ quantiles_sketch<T, C, A>::quantiles_sketch(uint16_t k, const C& comparator, const A& allocator):
36
+ comparator_(comparator),
38
37
  allocator_(allocator),
38
+ is_base_buffer_sorted_(true),
39
39
  k_(k),
40
40
  n_(0),
41
41
  bit_pattern_(0),
42
42
  base_buffer_(allocator_),
43
43
  levels_(allocator_),
44
- min_value_(nullptr),
45
- max_value_(nullptr),
46
- is_sorted_(true)
44
+ min_item_(nullptr),
45
+ max_item_(nullptr),
46
+ sorted_view_(nullptr)
47
47
  {
48
48
  check_k(k_);
49
49
  base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k));
@@ -51,18 +51,20 @@ is_sorted_(true)
51
51
 
52
52
  template<typename T, typename C, typename A>
53
53
  quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch& other):
54
+ comparator_(other.comparator_),
54
55
  allocator_(other.allocator_),
56
+ is_base_buffer_sorted_(other.is_base_buffer_sorted_),
55
57
  k_(other.k_),
56
58
  n_(other.n_),
57
59
  bit_pattern_(other.bit_pattern_),
58
60
  base_buffer_(other.base_buffer_),
59
61
  levels_(other.levels_),
60
- min_value_(nullptr),
61
- max_value_(nullptr),
62
- is_sorted_(other.is_sorted_)
62
+ min_item_(nullptr),
63
+ max_item_(nullptr),
64
+ sorted_view_(nullptr)
63
65
  {
64
- if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
65
- if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
66
+ if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
67
+ if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
66
68
  for (size_t i = 0; i < levels_.size(); ++i) {
67
69
  if (levels_[i].capacity() != other.levels_[i].capacity()) {
68
70
  levels_[i].reserve(other.levels_[i].capacity());
@@ -72,63 +74,71 @@ is_sorted_(other.is_sorted_)
72
74
 
73
75
  template<typename T, typename C, typename A>
74
76
  quantiles_sketch<T, C, A>::quantiles_sketch(quantiles_sketch&& other) noexcept:
77
+ comparator_(other.comparator_),
75
78
  allocator_(other.allocator_),
79
+ is_base_buffer_sorted_(other.is_base_buffer_sorted_),
76
80
  k_(other.k_),
77
81
  n_(other.n_),
78
82
  bit_pattern_(other.bit_pattern_),
79
83
  base_buffer_(std::move(other.base_buffer_)),
80
84
  levels_(std::move(other.levels_)),
81
- min_value_(other.min_value_),
82
- max_value_(other.max_value_),
83
- is_sorted_(other.is_sorted_)
85
+ min_item_(other.min_item_),
86
+ max_item_(other.max_item_),
87
+ sorted_view_(nullptr)
84
88
  {
85
- other.min_value_ = nullptr;
86
- other.max_value_ = nullptr;
89
+ other.min_item_ = nullptr;
90
+ other.max_item_ = nullptr;
87
91
  }
88
92
 
89
93
  template<typename T, typename C, typename A>
90
94
  quantiles_sketch<T, C, A>& quantiles_sketch<T, C, A>::operator=(const quantiles_sketch& other) {
91
95
  quantiles_sketch<T, C, A> copy(other);
96
+ std::swap(comparator_, copy.comparator_);
92
97
  std::swap(allocator_, copy.allocator_);
98
+ std::swap(is_base_buffer_sorted_, copy.is_base_buffer_sorted_);
93
99
  std::swap(k_, copy.k_);
94
100
  std::swap(n_, copy.n_);
95
101
  std::swap(bit_pattern_, copy.bit_pattern_);
96
102
  std::swap(base_buffer_, copy.base_buffer_);
97
103
  std::swap(levels_, copy.levels_);
98
- std::swap(min_value_, copy.min_value_);
99
- std::swap(max_value_, copy.max_value_);
100
- std::swap(is_sorted_, copy.is_sorted_);
104
+ std::swap(min_item_, copy.min_item_);
105
+ std::swap(max_item_, copy.max_item_);
106
+ reset_sorted_view();
101
107
  return *this;
102
108
  }
103
109
 
104
110
  template<typename T, typename C, typename A>
105
111
  quantiles_sketch<T, C, A>& quantiles_sketch<T, C, A>::operator=(quantiles_sketch&& other) noexcept {
112
+ std::swap(comparator_, other.comparator_);
106
113
  std::swap(allocator_, other.allocator_);
114
+ std::swap(is_base_buffer_sorted_, other.is_base_buffer_sorted_);
107
115
  std::swap(k_, other.k_);
108
116
  std::swap(n_, other.n_);
109
117
  std::swap(bit_pattern_, other.bit_pattern_);
110
118
  std::swap(base_buffer_, other.base_buffer_);
111
119
  std::swap(levels_, other.levels_);
112
- std::swap(min_value_, other.min_value_);
113
- std::swap(max_value_, other.max_value_);
114
- std::swap(is_sorted_, other.is_sorted_);
120
+ std::swap(min_item_, other.min_item_);
121
+ std::swap(max_item_, other.max_item_);
122
+ reset_sorted_view();
115
123
  return *this;
116
124
  }
117
125
 
118
126
  template<typename T, typename C, typename A>
119
127
  quantiles_sketch<T, C, A>::quantiles_sketch(uint16_t k, uint64_t n, uint64_t bit_pattern,
120
128
  Level&& base_buffer, VectorLevels&& levels,
121
- std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value,
122
- bool is_sorted, const A& allocator) :
129
+ std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item,
130
+ bool is_sorted, const C& comparator, const A& allocator):
131
+ comparator_(comparator),
123
132
  allocator_(allocator),
133
+ is_base_buffer_sorted_(is_sorted),
124
134
  k_(k),
125
135
  n_(n),
126
136
  bit_pattern_(bit_pattern),
127
137
  base_buffer_(std::move(base_buffer)),
128
138
  levels_(std::move(levels)),
129
- min_value_(min_value.release()),
130
- max_value_(max_value.release()),
131
- is_sorted_(is_sorted)
139
+ min_item_(min_item.release()),
140
+ max_item_(max_item.release()),
141
+ sorted_view_(nullptr)
132
142
  {
133
143
  uint32_t item_count = base_buffer_.size();
134
144
  for (Level& lvl : levels_) {
@@ -140,16 +150,19 @@ is_sorted_(is_sorted)
140
150
 
141
151
  template<typename T, typename C, typename A>
142
152
  template<typename From, typename FC, typename FA>
143
- quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch<From, FC, FA>& other, const A& allocator) :
153
+ quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch<From, FC, FA>& other,
154
+ const C& comparator, const A& allocator):
155
+ comparator_(comparator),
144
156
  allocator_(allocator),
157
+ is_base_buffer_sorted_(false),
145
158
  k_(other.get_k()),
146
159
  n_(other.get_n()),
147
160
  bit_pattern_(compute_bit_pattern(other.get_k(), other.get_n())),
148
161
  base_buffer_(allocator),
149
162
  levels_(allocator),
150
- min_value_(nullptr),
151
- max_value_(nullptr),
152
- is_sorted_(false)
163
+ min_item_(nullptr),
164
+ max_item_(nullptr),
165
+ sorted_view_(nullptr)
153
166
  {
154
167
  static_assert(std::is_constructible<T, From>::value,
155
168
  "Type converting constructor requires new type to be constructible from existing type");
@@ -157,8 +170,8 @@ is_sorted_(false)
157
170
  base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k_));
158
171
 
159
172
  if (!other.is_empty()) {
160
- min_value_ = new (allocator_.allocate(1)) T(other.get_min_value());
161
- max_value_ = new (allocator_.allocate(1)) T(other.get_max_value());
173
+ min_item_ = new (allocator_.allocate(1)) T(other.get_min_item());
174
+ max_item_ = new (allocator_.allocate(1)) T(other.get_max_item());
162
175
 
163
176
  // reserve space in levels
164
177
  const uint8_t num_levels = compute_levels_needed(k_, n_);
@@ -189,7 +202,7 @@ is_sorted_(false)
189
202
  // validate that ordering within each level is preserved
190
203
  // base_buffer_ can be considered unsorted for this purpose
191
204
  for (int i = 0; i < num_levels; ++i) {
192
- if (!std::is_sorted(levels_[i].begin(), levels_[i].end(), C())) {
205
+ if (!std::is_sorted(levels_[i].begin(), levels_[i].end(), comparator_)) {
193
206
  throw std::logic_error("Copy construction across types produces invalid sorting");
194
207
  }
195
208
  }
@@ -199,40 +212,38 @@ is_sorted_(false)
199
212
 
200
213
  template<typename T, typename C, typename A>
201
214
  quantiles_sketch<T, C, A>::~quantiles_sketch() {
202
- if (min_value_ != nullptr) {
203
- min_value_->~T();
204
- allocator_.deallocate(min_value_, 1);
215
+ if (min_item_ != nullptr) {
216
+ min_item_->~T();
217
+ allocator_.deallocate(min_item_, 1);
205
218
  }
206
- if (max_value_ != nullptr) {
207
- max_value_->~T();
208
- allocator_.deallocate(max_value_, 1);
219
+ if (max_item_ != nullptr) {
220
+ max_item_->~T();
221
+ allocator_.deallocate(max_item_, 1);
209
222
  }
223
+ reset_sorted_view();
210
224
  }
211
225
 
212
226
  template<typename T, typename C, typename A>
213
227
  template<typename FwdT>
214
228
  void quantiles_sketch<T, C, A>::update(FwdT&& item) {
215
- if (!check_update_value(item)) { return; }
229
+ if (!check_update_item(item)) { return; }
216
230
  if (is_empty()) {
217
- min_value_ = new (allocator_.allocate(1)) T(item);
218
- max_value_ = new (allocator_.allocate(1)) T(item);
231
+ min_item_ = new (allocator_.allocate(1)) T(item);
232
+ max_item_ = new (allocator_.allocate(1)) T(item);
219
233
  } else {
220
- if (C()(item, *min_value_)) *min_value_ = item;
221
- if (C()(*max_value_, item)) *max_value_ = item;
234
+ if (comparator_(item, *min_item_)) *min_item_ = item;
235
+ if (comparator_(*max_item_, item)) *max_item_ = item;
222
236
  }
223
237
 
224
238
  // if exceed capacity, grow until size 2k -- assumes eager processing
225
- if (base_buffer_.size() + 1 > base_buffer_.capacity())
226
- grow_base_buffer();
239
+ if (base_buffer_.size() + 1 > base_buffer_.capacity()) grow_base_buffer();
227
240
 
228
241
  base_buffer_.push_back(std::forward<FwdT>(item));
229
242
  ++n_;
230
243
 
231
- if (base_buffer_.size() > 1)
232
- is_sorted_ = false;
233
-
234
- if (base_buffer_.size() == 2 * k_)
235
- process_full_base_buffer();
244
+ if (base_buffer_.size() > 1) is_base_buffer_sorted_ = false;
245
+ if (base_buffer_.size() == 2 * k_) process_full_base_buffer();
246
+ reset_sorted_view();
236
247
  }
237
248
 
238
249
  template<typename T, typename C, typename A>
@@ -245,10 +256,11 @@ void quantiles_sketch<T, C, A>::merge(FwdSk&& other) {
245
256
  for (auto item : other.base_buffer_) {
246
257
  update(conditional_forward<FwdSk>(item));
247
258
  }
248
- return; // we're done
259
+ reset_sorted_view();
260
+ return;
249
261
  }
250
262
 
251
- // we know other has data and is in estimation mode
263
+ // other has data and is in estimation mode
252
264
  if (is_estimation_mode()) {
253
265
  if (k_ == other.get_k()) {
254
266
  standard_merge(*this, other);
@@ -273,6 +285,7 @@ void quantiles_sketch<T, C, A>::merge(FwdSk&& other) {
273
285
  }
274
286
  *this = sk_copy;
275
287
  }
288
+ reset_sorted_view();
276
289
  }
277
290
 
278
291
  template<typename T, typename C, typename A>
@@ -286,8 +299,8 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
286
299
  write(os, family);
287
300
 
288
301
  // side-effect: sort base buffer since always compact
289
- // can't set is_sorted_ since const method
290
- std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), C());
302
+ std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), comparator_);
303
+ const_cast<quantiles_sketch*>(this)->is_base_buffer_sorted_ = true;
291
304
 
292
305
  // empty, ordered, compact are valid flags
293
306
  const uint8_t flags_byte(
@@ -304,8 +317,8 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
304
317
  write(os, n_);
305
318
 
306
319
  // min and max
307
- serde.serialize(os, min_value_, 1);
308
- serde.serialize(os, max_value_, 1);
320
+ serde.serialize(os, min_item_, 1);
321
+ serde.serialize(os, max_item_, 1);
309
322
 
310
323
  // base buffer items
311
324
  serde.serialize(os, base_buffer_.data(), static_cast<unsigned>(base_buffer_.size()));
@@ -334,8 +347,8 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
334
347
  ptr += copy_to_mem(family, ptr);
335
348
 
336
349
  // side-effect: sort base buffer since always compact
337
- // can't set is_sorted_ since const method
338
- std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), C());
350
+ std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), comparator_);
351
+ const_cast<quantiles_sketch*>(this)->is_base_buffer_sorted_ = true;
339
352
 
340
353
  // empty, ordered, compact are valid flags
341
354
  const uint8_t flags_byte(
@@ -352,8 +365,8 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
352
365
  ptr += copy_to_mem(n_, ptr);
353
366
 
354
367
  // min and max
355
- ptr += serde.serialize(ptr, end_ptr - ptr, min_value_, 1);
356
- ptr += serde.serialize(ptr, end_ptr - ptr, max_value_, 1);
368
+ ptr += serde.serialize(ptr, end_ptr - ptr, min_item_, 1);
369
+ ptr += serde.serialize(ptr, end_ptr - ptr, max_item_, 1);
357
370
 
358
371
  // base buffer items
359
372
  if (base_buffer_.size() > 0)
@@ -371,7 +384,8 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
371
384
 
372
385
  template<typename T, typename C, typename A>
373
386
  template<typename SerDe>
374
- auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde, const A &allocator) -> quantiles_sketch {
387
+ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde,
388
+ const C& comparator, const A &allocator) -> quantiles_sketch {
375
389
  const auto preamble_longs = read<uint8_t>(is);
376
390
  const auto serial_version = read<uint8_t>(is);
377
391
  const auto family_id = read<uint8_t>(is);
@@ -387,7 +401,7 @@ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde
387
401
  if (!is.good()) throw std::runtime_error("error reading from std::istream");
388
402
  const bool is_empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
389
403
  if (is_empty) {
390
- return quantiles_sketch(k, allocator);
404
+ return quantiles_sketch(k, comparator, allocator);
391
405
  }
392
406
 
393
407
  const auto items_seen = read<uint64_t>(is);
@@ -397,17 +411,17 @@ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde
397
411
 
398
412
  A alloc(allocator);
399
413
  auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
400
- std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
401
- std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
402
- std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
403
- std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
414
+ std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
415
+ std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
416
+ std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
417
+ std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
404
418
 
405
- serde.deserialize(is, min_value_buffer.get(), 1);
419
+ serde.deserialize(is, min_item_buffer.get(), 1);
406
420
  // serde call did not throw, repackage with destrtuctor
407
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
408
- serde.deserialize(is, max_value_buffer.get(), 1);
421
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
422
+ serde.deserialize(is, max_item_buffer.get(), 1);
409
423
  // serde call did not throw, repackage with destrtuctor
410
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
424
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
411
425
 
412
426
  if (serial_version == 1) {
413
427
  read<uint64_t>(is); // no longer used
@@ -449,7 +463,8 @@ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde
449
463
  }
450
464
 
451
465
  return quantiles_sketch(k, items_seen, bit_pattern,
452
- std::move(base_buffer), std::move(levels), std::move(min_value), std::move(max_value), is_sorted, allocator);
466
+ std::move(base_buffer), std::move(levels), std::move(min_item), std::move(max_item), is_sorted,
467
+ comparator, allocator);
453
468
  }
454
469
 
455
470
  template<typename T, typename C, typename A>
@@ -473,7 +488,8 @@ auto quantiles_sketch<T, C, A>::deserialize_array(std::istream& is, uint32_t num
473
488
 
474
489
  template<typename T, typename C, typename A>
475
490
  template<typename SerDe>
476
- auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, const SerDe& serde, const A &allocator) -> quantiles_sketch {
491
+ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, const SerDe& serde,
492
+ const C& comparator, const A &allocator) -> quantiles_sketch {
477
493
  ensure_minimum_memory(size, 8);
478
494
  const char* ptr = static_cast<const char*>(bytes);
479
495
  const char* end_ptr = static_cast<const char*>(bytes) + size;
@@ -498,7 +514,7 @@ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, cons
498
514
 
499
515
  const bool is_empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
500
516
  if (is_empty) {
501
- return quantiles_sketch(k, allocator);
517
+ return quantiles_sketch(k, comparator, allocator);
502
518
  }
503
519
 
504
520
  ensure_minimum_memory(size, 16);
@@ -510,17 +526,17 @@ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, cons
510
526
 
511
527
  A alloc(allocator);
512
528
  auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
513
- std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
514
- std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
515
- std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
516
- std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
529
+ std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
530
+ std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
531
+ std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
532
+ std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
517
533
 
518
- ptr += serde.deserialize(ptr, end_ptr - ptr, min_value_buffer.get(), 1);
534
+ ptr += serde.deserialize(ptr, end_ptr - ptr, min_item_buffer.get(), 1);
519
535
  // serde call did not throw, repackage with destrtuctor
520
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
521
- ptr += serde.deserialize(ptr, end_ptr - ptr, max_value_buffer.get(), 1);
536
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
537
+ ptr += serde.deserialize(ptr, end_ptr - ptr, max_item_buffer.get(), 1);
522
538
  // serde call did not throw, repackage with destrtuctor
523
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
539
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
524
540
 
525
541
  if (serial_version == 1) {
526
542
  uint64_t unused_long;
@@ -567,7 +583,8 @@ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, cons
567
583
  }
568
584
 
569
585
  return quantiles_sketch(k, items_seen, bit_pattern,
570
- std::move(base_buffer_pair.first), std::move(levels), std::move(min_value), std::move(max_value), is_sorted, allocator);
586
+ std::move(base_buffer_pair.first), std::move(levels), std::move(min_item), std::move(max_item), is_sorted,
587
+ comparator, allocator);
571
588
  }
572
589
 
573
590
  template<typename T, typename C, typename A>
@@ -605,11 +622,11 @@ string<A> quantiles_sketch<T, C, A>::to_string(bool print_levels, bool print_ite
605
622
  os << " Empty : " << (is_empty() ? "true" : "false") << std::endl;
606
623
  os << " Estimation mode: " << (is_estimation_mode() ? "true" : "false") << std::endl;
607
624
  os << " Levels (w/o BB): " << levels_.size() << std::endl;
608
- os << " Used Levels : " << compute_valid_levels(bit_pattern_) << std::endl;
625
+ os << " Used Levels : " << count_valid_levels(bit_pattern_) << std::endl;
609
626
  os << " Retained items : " << get_num_retained() << std::endl;
610
627
  if (!is_empty()) {
611
- os << " Min value : " << *min_value_ << std::endl;
612
- os << " Max value : " << *max_value_ << std::endl;
628
+ os << " Min item : " << *min_item_ << std::endl;
629
+ os << " Max item : " << *max_item_ << std::endl;
613
630
  }
614
631
  os << "### End sketch summary" << std::endl;
615
632
 
@@ -667,20 +684,20 @@ uint32_t quantiles_sketch<T, C, A>::get_num_retained() const {
667
684
  }
668
685
 
669
686
  template<typename T, typename C, typename A>
670
- const T& quantiles_sketch<T, C, A>::get_min_value() const {
671
- if (is_empty()) return get_invalid_value();
672
- return *min_value_;
687
+ const T& quantiles_sketch<T, C, A>::get_min_item() const {
688
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
689
+ return *min_item_;
673
690
  }
674
691
 
675
692
  template<typename T, typename C, typename A>
676
- const T& quantiles_sketch<T, C, A>::get_max_value() const {
677
- if (is_empty()) return get_invalid_value();
678
- return *max_value_;
693
+ const T& quantiles_sketch<T, C, A>::get_max_item() const {
694
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
695
+ return *max_item_;
679
696
  }
680
697
 
681
698
  template<typename T, typename C, typename A>
682
699
  C quantiles_sketch<T, C, A>::get_comparator() const {
683
- return C();
700
+ return comparator_;
684
701
  }
685
702
 
686
703
  template<typename T, typename C, typename A>
@@ -702,8 +719,8 @@ template<typename SerDe, typename TT, typename std::enable_if<!std::is_arithmeti
702
719
  size_t quantiles_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& serde) const {
703
720
  if (is_empty()) { return EMPTY_SIZE_BYTES; }
704
721
  size_t size = DATA_START;
705
- size += serde.size_of_item(*min_value_);
706
- size += serde.size_of_item(*max_value_);
722
+ size += serde.size_of_item(*min_item_);
723
+ size += serde.size_of_item(*max_item_);
707
724
  for (auto it: *this) size += serde.size_of_item(it.first);
708
725
  return size;
709
726
  }
@@ -721,162 +738,121 @@ double quantiles_sketch<T, C, A>::get_normalized_rank_error(uint16_t k, bool is_
721
738
  }
722
739
 
723
740
  template<typename T, typename C, typename A>
724
- template<bool inclusive>
725
- quantile_sketch_sorted_view<T, C, A> quantiles_sketch<T, C, A>::get_sorted_view(bool cumulative) const {
726
- // allow side-effect of sorting the base buffer; can't set the flag since
727
- // this is a const method
728
- if (!is_sorted_) {
729
- std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), C());
741
+ quantiles_sorted_view<T, C, A> quantiles_sketch<T, C, A>::get_sorted_view() const {
742
+ // allow side-effect of sorting the base buffer
743
+ if (!is_base_buffer_sorted_) {
744
+ std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), comparator_);
745
+ const_cast<quantiles_sketch*>(this)->is_base_buffer_sorted_ = true;
730
746
  }
731
- quantile_sketch_sorted_view<T, C, A> view(get_num_retained(), allocator_);
747
+ quantiles_sorted_view<T, C, A> view(get_num_retained(), comparator_, allocator_);
732
748
 
733
749
  uint64_t weight = 1;
734
750
  view.add(base_buffer_.begin(), base_buffer_.end(), weight);
735
- for (auto& level : levels_) {
751
+ for (const auto& level: levels_) {
736
752
  weight <<= 1;
737
753
  if (level.empty()) { continue; }
738
754
  view.add(level.begin(), level.end(), weight);
739
755
  }
740
756
 
741
- if (cumulative) view.template convert_to_cummulative<inclusive>();
757
+ view.convert_to_cummulative();
742
758
  return view;
743
759
  }
744
760
 
745
761
  template<typename T, typename C, typename A>
746
- template<bool inclusive>
747
- auto quantiles_sketch<T, C, A>::get_quantile(double rank) const -> quantile_return_type {
748
- if (is_empty()) return get_invalid_value();
749
- if (rank == 0.0) return *min_value_;
750
- if (rank == 1.0) return *max_value_;
762
+ auto quantiles_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
763
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
751
764
  if ((rank < 0.0) || (rank > 1.0)) {
752
- throw std::invalid_argument("Rank cannot be less than zero or greater than 1.0");
765
+ throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
753
766
  }
754
767
  // possible side-effect: sorting base buffer
755
- return get_sorted_view<inclusive>(true).get_quantile(rank);
768
+ setup_sorted_view();
769
+ return sorted_view_->get_quantile(rank, inclusive);
756
770
  }
757
771
 
758
772
  template<typename T, typename C, typename A>
759
- template<bool inclusive>
760
- std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size) const {
773
+ std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
774
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
761
775
  std::vector<T, A> quantiles(allocator_);
762
- if (is_empty()) return quantiles;
763
776
  quantiles.reserve(size);
764
777
 
765
778
  // possible side-effect: sorting base buffer
766
- auto view = get_sorted_view<inclusive>(true);
779
+ setup_sorted_view();
767
780
 
768
781
  for (uint32_t i = 0; i < size; ++i) {
769
782
  const double rank = ranks[i];
770
783
  if ((rank < 0.0) || (rank > 1.0)) {
771
- throw std::invalid_argument("rank cannot be less than zero or greater than 1.0");
772
- }
773
- if (rank == 0.0) quantiles.push_back(*min_value_);
774
- else if (rank == 1.0) quantiles.push_back(*max_value_);
775
- else {
776
- quantiles.push_back(view.get_quantile(rank));
784
+ throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
777
785
  }
786
+ quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
778
787
  }
779
788
  return quantiles;
780
789
  }
781
790
 
782
791
  template<typename T, typename C, typename A>
783
- template<bool inclusive>
784
- std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(uint32_t num) const {
785
- if (is_empty()) return std::vector<T, A>(allocator_);
792
+ std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(uint32_t num, bool inclusive) const {
793
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
786
794
  if (num == 0) {
787
795
  throw std::invalid_argument("num must be > 0");
788
796
  }
789
- vector_double fractions(num, 0, allocator_);
790
- fractions[0] = 0.0;
797
+ vector_double ranks(num, 0, allocator_);
798
+ ranks[0] = 0.0;
791
799
  for (size_t i = 1; i < num; i++) {
792
- fractions[i] = static_cast<double>(i) / (num - 1);
800
+ ranks[i] = static_cast<double>(i) / (num - 1);
793
801
  }
794
802
  if (num > 1) {
795
- fractions[num - 1] = 1.0;
803
+ ranks[num - 1] = 1.0;
796
804
  }
797
- return get_quantiles<inclusive>(fractions.data(), num);
805
+ return get_quantiles(ranks.data(), num, inclusive);
798
806
  }
799
807
 
800
808
  template<typename T, typename C, typename A>
801
- template<bool inclusive>
802
- double quantiles_sketch<T, C, A>::get_rank(const T& value) const {
803
- if (is_empty()) return std::numeric_limits<double>::quiet_NaN();
804
- uint64_t weight = 1;
805
- uint64_t total = 0;
806
- for (const T &item: base_buffer_) {
807
- if (inclusive ? !C()(value, item) : C()(item, value))
808
- total += weight;
809
- }
810
-
811
- weight *= 2;
812
- for (uint8_t level = 0; level < levels_.size(); ++level, weight *= 2) {
813
- if (levels_[level].empty()) { continue; }
814
- const T* data = levels_[level].data();
815
- for (uint16_t i = 0; i < k_; ++i) {
816
- if (inclusive ? !C()(value, data[i]) : C()(data[i], value))
817
- total += weight;
818
- else
819
- break; // levels are sorted, no point comparing further
820
- }
821
- }
822
- return (double) total / n_;
809
+ double quantiles_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
810
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
811
+ setup_sorted_view();
812
+ return sorted_view_->get_rank(item, inclusive);
823
813
  }
824
814
 
825
815
  template<typename T, typename C, typename A>
826
- template<bool inclusive>
827
- auto quantiles_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size) const -> vector_double {
828
- auto buckets = get_CDF<inclusive>(split_points, size);
829
- if (is_empty()) return buckets;
830
- for (uint32_t i = size; i > 0; --i) {
831
- buckets[i] -= buckets[i - 1];
832
- }
833
- return buckets;
816
+ auto quantiles_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
817
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
818
+ setup_sorted_view();
819
+ return sorted_view_->get_PMF(split_points, size, inclusive);
834
820
  }
835
821
 
836
822
  template<typename T, typename C, typename A>
837
- template<bool inclusive>
838
- auto quantiles_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size) const -> vector_double {
839
- vector_double buckets(allocator_);
840
- if (is_empty()) return buckets;
841
- check_split_points(split_points, size);
842
- buckets.reserve(size + 1);
843
- for (uint32_t i = 0; i < size; ++i) buckets.push_back(get_rank<inclusive>(split_points[i]));
844
- buckets.push_back(1);
845
- return buckets;
823
+ auto quantiles_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
824
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
825
+ setup_sorted_view();
826
+ return sorted_view_->get_CDF(split_points, size, inclusive);
846
827
  }
847
828
 
848
829
  template<typename T, typename C, typename A>
849
- uint32_t quantiles_sketch<T, C, A>::compute_retained_items(const uint16_t k, const uint64_t n) {
830
+ uint32_t quantiles_sketch<T, C, A>::compute_retained_items(uint16_t k, uint64_t n) {
850
831
  const uint32_t bb_count = compute_base_buffer_items(k, n);
851
832
  const uint64_t bit_pattern = compute_bit_pattern(k, n);
852
- const uint32_t valid_levels = compute_valid_levels(bit_pattern);
833
+ const uint32_t valid_levels = count_valid_levels(bit_pattern);
853
834
  return bb_count + (k * valid_levels);
854
835
  }
855
836
 
856
837
  template<typename T, typename C, typename A>
857
- uint32_t quantiles_sketch<T, C, A>::compute_base_buffer_items(const uint16_t k, const uint64_t n) {
838
+ uint32_t quantiles_sketch<T, C, A>::compute_base_buffer_items(uint16_t k, uint64_t n) {
858
839
  return n % (static_cast<uint64_t>(2) * k);
859
840
  }
860
841
 
861
842
  template<typename T, typename C, typename A>
862
- uint64_t quantiles_sketch<T, C, A>::compute_bit_pattern(const uint16_t k, const uint64_t n) {
843
+ uint64_t quantiles_sketch<T, C, A>::compute_bit_pattern(uint16_t k, uint64_t n) {
863
844
  return n / (static_cast<uint64_t>(2) * k);
864
845
  }
865
846
 
866
847
  template<typename T, typename C, typename A>
867
- uint32_t quantiles_sketch<T, C, A>::compute_valid_levels(const uint64_t bit_pattern) {
868
- // TODO: Java's Long.bitCount() probably uses a better method
869
- uint64_t bp = bit_pattern;
848
+ uint32_t quantiles_sketch<T, C, A>::count_valid_levels(uint64_t bit_pattern) {
870
849
  uint32_t count = 0;
871
- while (bp > 0) {
872
- if ((bp & 0x01) == 1) ++count;
873
- bp >>= 1;
874
- }
850
+ for (; bit_pattern > 0; ++count) bit_pattern &= bit_pattern - 1;
875
851
  return count;
876
852
  }
877
853
 
878
854
  template<typename T, typename C, typename A>
879
- uint8_t quantiles_sketch<T, C, A>::compute_levels_needed(const uint16_t k, const uint64_t n) {
855
+ uint8_t quantiles_sketch<T, C, A>::compute_levels_needed(uint16_t k, uint64_t n) {
880
856
  return static_cast<uint8_t>(64U) - count_leading_zeros_in_u64(n / (2 * k));
881
857
  }
882
858
 
@@ -961,13 +937,13 @@ void quantiles_sketch<T, C, A>::process_full_base_buffer() {
961
937
  // make sure there will be enough levels for the propagation
962
938
  grow_levels_if_needed(); // note: n_ was already incremented by update() before this
963
939
 
964
- std::sort(base_buffer_.begin(), base_buffer_.end(), C());
940
+ std::sort(base_buffer_.begin(), base_buffer_.end(), comparator_);
965
941
  in_place_propagate_carry(0,
966
942
  levels_[0], // unused here, but 0 is guaranteed to exist
967
943
  base_buffer_,
968
944
  true, *this);
969
945
  base_buffer_.clear();
970
- is_sorted_ = true;
946
+ is_base_buffer_sorted_ = true;
971
947
  if (n_ / (2 * k_) != bit_pattern_) {
972
948
  throw std::logic_error("Internal error: n / 2k (" + std::to_string(n_ / 2 * k_)
973
949
  + " != bit_pattern " + std::to_string(bit_pattern_));
@@ -1019,7 +995,7 @@ void quantiles_sketch<T, C, A>::in_place_propagate_carry(uint8_t starting_level,
1019
995
  merge_two_size_k_buffers(
1020
996
  sketch.levels_[lvl],
1021
997
  sketch.levels_[ending_level],
1022
- buf_size_2k);
998
+ buf_size_2k, sketch.get_comparator());
1023
999
  sketch.levels_[lvl].clear();
1024
1000
  sketch.levels_[ending_level].clear();
1025
1001
  zip_buffer(buf_size_2k, sketch.levels_[ending_level]);
@@ -1071,9 +1047,9 @@ void quantiles_sketch<T, C, A>::zip_buffer_with_stride(FwdV&& buf_in, Level& buf
1071
1047
  // do not clear input buffer
1072
1048
  }
1073
1049
 
1074
-
1075
1050
  template<typename T, typename C, typename A>
1076
- void quantiles_sketch<T, C, A>::merge_two_size_k_buffers(Level& src_1, Level& src_2, Level& dst) {
1051
+ void quantiles_sketch<T, C, A>::merge_two_size_k_buffers(Level& src_1, Level& src_2,
1052
+ Level& dst, const C& comparator) {
1077
1053
  if (src_1.size() != src_2.size()
1078
1054
  || src_1.size() * 2 != dst.capacity()
1079
1055
  || dst.size() != 0) {
@@ -1085,7 +1061,7 @@ void quantiles_sketch<T, C, A>::merge_two_size_k_buffers(Level& src_1, Level& sr
1085
1061
 
1086
1062
  // TODO: probably actually doing copies given Level&?
1087
1063
  while (it1 != end1 && it2 != end2) {
1088
- if (C()(*it1, *it2)) {
1064
+ if (comparator(*it1, *it2)) {
1089
1065
  dst.push_back(std::move(*it1++));
1090
1066
  } else {
1091
1067
  dst.push_back(std::move(*it2++));
@@ -1100,7 +1076,6 @@ void quantiles_sketch<T, C, A>::merge_two_size_k_buffers(Level& src_1, Level& sr
1100
1076
  }
1101
1077
  }
1102
1078
 
1103
-
1104
1079
  template<typename T, typename C, typename A>
1105
1080
  template<typename FwdSk>
1106
1081
  void quantiles_sketch<T, C, A>::standard_merge(quantiles_sketch& tgt, FwdSk&& src) {
@@ -1149,25 +1124,24 @@ void quantiles_sketch<T, C, A>::standard_merge(quantiles_sketch& tgt, FwdSk&& sr
1149
1124
  throw std::logic_error("Failed internal consistency check after standard_merge()");
1150
1125
  }
1151
1126
 
1152
- // update min and max values
1127
+ // update min and max items
1153
1128
  // can't just check is_empty() since min and max might not have been set if
1154
1129
  // there were no base buffer items added via update()
1155
- if (tgt.min_value_ == nullptr) {
1156
- tgt.min_value_ = new (tgt.allocator_.allocate(1)) T(*src.min_value_);
1130
+ if (tgt.min_item_ == nullptr) {
1131
+ tgt.min_item_ = new (tgt.allocator_.allocate(1)) T(*src.min_item_);
1157
1132
  } else {
1158
- if (C()(*src.min_value_, *tgt.min_value_))
1159
- *tgt.min_value_ = conditional_forward<FwdSk>(*src.min_value_);
1133
+ if (tgt.comparator_(*src.min_item_, *tgt.min_item_))
1134
+ *tgt.min_item_ = conditional_forward<FwdSk>(*src.min_item_);
1160
1135
  }
1161
1136
 
1162
- if (tgt.max_value_ == nullptr) {
1163
- tgt.max_value_ = new (tgt.allocator_.allocate(1)) T(*src.max_value_);
1137
+ if (tgt.max_item_ == nullptr) {
1138
+ tgt.max_item_ = new (tgt.allocator_.allocate(1)) T(*src.max_item_);
1164
1139
  } else {
1165
- if (C()(*tgt.max_value_, *src.max_value_))
1166
- *tgt.max_value_ = conditional_forward<FwdSk>(*src.max_value_);
1140
+ if (tgt.comparator_(*tgt.max_item_, *src.max_item_))
1141
+ *tgt.max_item_ = conditional_forward<FwdSk>(*src.max_item_);
1167
1142
  }
1168
1143
  }
1169
1144
 
1170
-
1171
1145
  template<typename T, typename C, typename A>
1172
1146
  template<typename FwdSk>
1173
1147
  void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&& src) {
@@ -1226,25 +1200,24 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
1226
1200
  throw std::logic_error("Failed internal consistency check after downsampling_merge()");
1227
1201
  }
1228
1202
 
1229
- // update min and max values
1203
+ // update min and max items
1230
1204
  // can't just check is_empty() since min and max might not have been set if
1231
1205
  // there were no base buffer items added via update()
1232
- if (tgt.min_value_ == nullptr) {
1233
- tgt.min_value_ = new (tgt.allocator_.allocate(1)) T(*src.min_value_);
1206
+ if (tgt.min_item_ == nullptr) {
1207
+ tgt.min_item_ = new (tgt.allocator_.allocate(1)) T(*src.min_item_);
1234
1208
  } else {
1235
- if (C()(*src.min_value_, *tgt.min_value_))
1236
- *tgt.min_value_ = conditional_forward<FwdSk>(*src.min_value_);
1209
+ if (tgt.comparator_(*src.min_item_, *tgt.min_item_))
1210
+ *tgt.min_item_ = conditional_forward<FwdSk>(*src.min_item_);
1237
1211
  }
1238
1212
 
1239
- if (tgt.max_value_ == nullptr) {
1240
- tgt.max_value_ = new (tgt.allocator_.allocate(1)) T(*src.max_value_);
1213
+ if (tgt.max_item_ == nullptr) {
1214
+ tgt.max_item_ = new (tgt.allocator_.allocate(1)) T(*src.max_item_);
1241
1215
  } else {
1242
- if (C()(*tgt.max_value_, *src.max_value_))
1243
- *tgt.max_value_ = conditional_forward<FwdSk>(*src.max_value_);
1216
+ if (tgt.comparator_(*tgt.max_item_, *src.max_item_))
1217
+ *tgt.max_item_ = conditional_forward<FwdSk>(*src.max_item_);
1244
1218
  }
1245
1219
  }
1246
1220
 
1247
-
1248
1221
  template<typename T, typename C, typename A>
1249
1222
  uint8_t quantiles_sketch<T, C, A>::lowest_zero_bit_starting_at(uint64_t bits, uint8_t starting_bit) {
1250
1223
  uint8_t pos = starting_bit & 0X3F;
@@ -1292,6 +1265,23 @@ class quantiles_sketch<T, C, A>::items_deleter {
1292
1265
  size_t num_;
1293
1266
  };
1294
1267
 
1268
+ template<typename T, typename C, typename A>
1269
+ void quantiles_sketch<T, C, A>::setup_sorted_view() const {
1270
+ if (sorted_view_ == nullptr) {
1271
+ using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
1272
+ sorted_view_ = new (AllocSortedView(allocator_).allocate(1)) quantiles_sorted_view<T, C, A>(get_sorted_view());
1273
+ }
1274
+ }
1275
+
1276
+ template<typename T, typename C, typename A>
1277
+ void quantiles_sketch<T, C, A>::reset_sorted_view() {
1278
+ if (sorted_view_ != nullptr) {
1279
+ sorted_view_->~quantiles_sorted_view();
1280
+ using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
1281
+ AllocSortedView(allocator_).deallocate(sorted_view_, 1);
1282
+ sorted_view_ = nullptr;
1283
+ }
1284
+ }
1295
1285
 
1296
1286
  // quantiles_sketch::const_iterator implementation
1297
1287
 
@@ -1364,8 +1354,13 @@ bool quantiles_sketch<T, C, A>::const_iterator::operator!=(const const_iterator&
1364
1354
  }
1365
1355
 
1366
1356
  template<typename T, typename C, typename A>
1367
- std::pair<const T&, const uint64_t> quantiles_sketch<T, C, A>::const_iterator::operator*() const {
1368
- return std::pair<const T&, const uint64_t>(level_ == -1 ? base_buffer_[index_] : levels_[level_][index_], weight_);
1357
+ auto quantiles_sketch<T, C, A>::const_iterator::operator*() const -> const value_type {
1358
+ return value_type(level_ == -1 ? base_buffer_[index_] : levels_[level_][index_], weight_);
1359
+ }
1360
+
1361
+ template<typename T, typename C, typename A>
1362
+ auto quantiles_sketch<T, C, A>::const_iterator::operator->() const -> const return_value_holder<value_type> {
1363
+ return **this;
1369
1364
  }
1370
1365
 
1371
1366
  } /* namespace datasketches */