datasketches 0.2.7 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -25,8 +25,9 @@
25
25
 
26
26
  namespace datasketches {
27
27
 
28
- template<typename T, typename C, typename S, typename A>
29
- req_sketch<T, C, S, A>::req_sketch(uint16_t k, bool hra, const A& allocator):
28
+ template<typename T, typename C, typename A>
29
+ req_sketch<T, C, A>::req_sketch(uint16_t k, bool hra, const C& comparator, const A& allocator):
30
+ comparator_(comparator),
30
31
  allocator_(allocator),
31
32
  k_(std::max<uint8_t>(static_cast<int>(k) & -2, static_cast<int>(req_constants::MIN_K))), //rounds down one if odd
32
33
  hra_(hra),
@@ -34,26 +35,29 @@ max_nom_size_(0),
34
35
  num_retained_(0),
35
36
  n_(0),
36
37
  compactors_(allocator),
37
- min_value_(nullptr),
38
- max_value_(nullptr)
38
+ min_item_(nullptr),
39
+ max_item_(nullptr),
40
+ sorted_view_(nullptr)
39
41
  {
40
42
  grow();
41
43
  }
42
44
 
43
- template<typename T, typename C, typename S, typename A>
44
- req_sketch<T, C, S, A>::~req_sketch() {
45
- if (min_value_ != nullptr) {
46
- min_value_->~T();
47
- allocator_.deallocate(min_value_, 1);
45
+ template<typename T, typename C, typename A>
46
+ req_sketch<T, C, A>::~req_sketch() {
47
+ if (min_item_ != nullptr) {
48
+ min_item_->~T();
49
+ allocator_.deallocate(min_item_, 1);
48
50
  }
49
- if (max_value_ != nullptr) {
50
- max_value_->~T();
51
- allocator_.deallocate(max_value_, 1);
51
+ if (max_item_ != nullptr) {
52
+ max_item_->~T();
53
+ allocator_.deallocate(max_item_, 1);
52
54
  }
55
+ reset_sorted_view();
53
56
  }
54
57
 
55
- template<typename T, typename C, typename S, typename A>
56
- req_sketch<T, C, S, A>::req_sketch(const req_sketch& other):
58
+ template<typename T, typename C, typename A>
59
+ req_sketch<T, C, A>::req_sketch(const req_sketch& other):
60
+ comparator_(other.comparator_),
57
61
  allocator_(other.allocator_),
58
62
  k_(other.k_),
59
63
  hra_(other.hra_),
@@ -61,15 +65,17 @@ max_nom_size_(other.max_nom_size_),
61
65
  num_retained_(other.num_retained_),
62
66
  n_(other.n_),
63
67
  compactors_(other.compactors_),
64
- min_value_(nullptr),
65
- max_value_(nullptr)
68
+ min_item_(nullptr),
69
+ max_item_(nullptr),
70
+ sorted_view_(nullptr)
66
71
  {
67
- if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
68
- if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
72
+ if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
73
+ if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
69
74
  }
70
75
 
71
- template<typename T, typename C, typename S, typename A>
72
- req_sketch<T, C, S, A>::req_sketch(req_sketch&& other) noexcept :
76
+ template<typename T, typename C, typename A>
77
+ req_sketch<T, C, A>::req_sketch(req_sketch&& other) noexcept :
78
+ comparator_(std::move(other.comparator_)),
73
79
  allocator_(std::move(other.allocator_)),
74
80
  k_(other.k_),
75
81
  hra_(other.hra_),
@@ -77,16 +83,18 @@ max_nom_size_(other.max_nom_size_),
77
83
  num_retained_(other.num_retained_),
78
84
  n_(other.n_),
79
85
  compactors_(std::move(other.compactors_)),
80
- min_value_(other.min_value_),
81
- max_value_(other.max_value_)
86
+ min_item_(other.min_item_),
87
+ max_item_(other.max_item_),
88
+ sorted_view_(nullptr)
82
89
  {
83
- other.min_value_ = nullptr;
84
- other.max_value_ = nullptr;
90
+ other.min_item_ = nullptr;
91
+ other.max_item_ = nullptr;
85
92
  }
86
93
 
87
- template<typename T, typename C, typename S, typename A>
88
- req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(const req_sketch& other) {
94
+ template<typename T, typename C, typename A>
95
+ req_sketch<T, C, A>& req_sketch<T, C, A>::operator=(const req_sketch& other) {
89
96
  req_sketch copy(other);
97
+ std::swap(comparator_, copy.comparator_);
90
98
  std::swap(allocator_, copy.allocator_);
91
99
  std::swap(k_, copy.k_);
92
100
  std::swap(hra_, copy.hra_);
@@ -94,13 +102,15 @@ req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(const req_sketch& othe
94
102
  std::swap(num_retained_, copy.num_retained_);
95
103
  std::swap(n_, copy.n_);
96
104
  std::swap(compactors_, copy.compactors_);
97
- std::swap(min_value_, copy.min_value_);
98
- std::swap(max_value_, copy.max_value_);
105
+ std::swap(min_item_, copy.min_item_);
106
+ std::swap(max_item_, copy.max_item_);
107
+ reset_sorted_view();
99
108
  return *this;
100
109
  }
101
110
 
102
- template<typename T, typename C, typename S, typename A>
103
- req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(req_sketch&& other) {
111
+ template<typename T, typename C, typename A>
112
+ req_sketch<T, C, A>& req_sketch<T, C, A>::operator=(req_sketch&& other) {
113
+ std::swap(comparator_, other.comparator_);
104
114
  std::swap(allocator_, other.allocator_);
105
115
  std::swap(k_, other.k_);
106
116
  std::swap(hra_, other.hra_);
@@ -108,14 +118,16 @@ req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(req_sketch&& other) {
108
118
  std::swap(num_retained_, other.num_retained_);
109
119
  std::swap(n_, other.n_);
110
120
  std::swap(compactors_, other.compactors_);
111
- std::swap(min_value_, other.min_value_);
112
- std::swap(max_value_, other.max_value_);
121
+ std::swap(min_item_, other.min_item_);
122
+ std::swap(max_item_, other.max_item_);
123
+ reset_sorted_view();
113
124
  return *this;
114
125
  }
115
126
 
116
- template<typename T, typename C, typename S, typename A>
117
- template<typename TT, typename CC, typename SS, typename AA>
118
- req_sketch<T, C, S, A>::req_sketch(const req_sketch<TT, CC, SS, AA>& other, const A& allocator):
127
+ template<typename T, typename C, typename A>
128
+ template<typename TT, typename CC, typename AA>
129
+ req_sketch<T, C, A>::req_sketch(const req_sketch<TT, CC, AA>& other, const C& comparator, const A& allocator):
130
+ comparator_(comparator),
119
131
  allocator_(allocator),
120
132
  k_(other.k_),
121
133
  hra_(other.hra_),
@@ -123,8 +135,9 @@ max_nom_size_(other.max_nom_size_),
123
135
  num_retained_(other.num_retained_),
124
136
  n_(other.n_),
125
137
  compactors_(allocator),
126
- min_value_(nullptr),
127
- max_value_(nullptr)
138
+ min_item_(nullptr),
139
+ max_item_(nullptr),
140
+ sorted_view_(nullptr)
128
141
  {
129
142
  static_assert(
130
143
  std::is_constructible<T, TT>::value,
@@ -132,72 +145,73 @@ max_value_(nullptr)
132
145
  );
133
146
  compactors_.reserve(other.compactors_.size());
134
147
  for (const auto& compactor: other.compactors_) {
135
- compactors_.push_back(req_compactor<T, C, A>(compactor, allocator_));
148
+ compactors_.push_back(req_compactor<T, C, A>(compactor, comparator_, allocator_));
136
149
  }
137
150
  if (!other.is_empty()) {
138
- min_value_ = new (allocator_.allocate(1)) T(other.get_min_value());
139
- max_value_ = new (allocator_.allocate(1)) T(other.get_max_value());
151
+ min_item_ = new (allocator_.allocate(1)) T(other.get_min_item());
152
+ max_item_ = new (allocator_.allocate(1)) T(other.get_max_item());
140
153
  }
141
154
  }
142
155
 
143
- template<typename T, typename C, typename S, typename A>
144
- uint16_t req_sketch<T, C, S, A>::get_k() const {
156
+ template<typename T, typename C, typename A>
157
+ uint16_t req_sketch<T, C, A>::get_k() const {
145
158
  return k_;
146
159
  }
147
160
 
148
- template<typename T, typename C, typename S, typename A>
149
- bool req_sketch<T, C, S, A>::is_HRA() const {
161
+ template<typename T, typename C, typename A>
162
+ bool req_sketch<T, C, A>::is_HRA() const {
150
163
  return hra_;
151
164
  }
152
165
 
153
- template<typename T, typename C, typename S, typename A>
154
- bool req_sketch<T, C, S, A>::is_empty() const {
166
+ template<typename T, typename C, typename A>
167
+ bool req_sketch<T, C, A>::is_empty() const {
155
168
  return n_ == 0;
156
169
  }
157
170
 
158
- template<typename T, typename C, typename S, typename A>
159
- uint64_t req_sketch<T, C, S, A>::get_n() const {
171
+ template<typename T, typename C, typename A>
172
+ uint64_t req_sketch<T, C, A>::get_n() const {
160
173
  return n_;
161
174
  }
162
175
 
163
- template<typename T, typename C, typename S, typename A>
164
- uint32_t req_sketch<T, C, S, A>::get_num_retained() const {
176
+ template<typename T, typename C, typename A>
177
+ uint32_t req_sketch<T, C, A>::get_num_retained() const {
165
178
  return num_retained_;
166
179
  }
167
180
 
168
- template<typename T, typename C, typename S, typename A>
169
- bool req_sketch<T, C, S, A>::is_estimation_mode() const {
181
+ template<typename T, typename C, typename A>
182
+ bool req_sketch<T, C, A>::is_estimation_mode() const {
170
183
  return compactors_.size() > 1;
171
184
  }
172
185
 
173
- template<typename T, typename C, typename S, typename A>
186
+ template<typename T, typename C, typename A>
174
187
  template<typename FwdT>
175
- void req_sketch<T, C, S, A>::update(FwdT&& item) {
176
- if (!check_update_value(item)) { return; }
188
+ void req_sketch<T, C, A>::update(FwdT&& item) {
189
+ if (!check_update_item(item)) { return; }
177
190
  if (is_empty()) {
178
- min_value_ = new (allocator_.allocate(1)) T(item);
179
- max_value_ = new (allocator_.allocate(1)) T(item);
191
+ min_item_ = new (allocator_.allocate(1)) T(item);
192
+ max_item_ = new (allocator_.allocate(1)) T(item);
180
193
  } else {
181
- if (C()(item, *min_value_)) *min_value_ = item;
182
- if (C()(*max_value_, item)) *max_value_ = item;
194
+ if (comparator_(item, *min_item_)) *min_item_ = item;
195
+ if (comparator_(*max_item_, item)) *max_item_ = item;
183
196
  }
184
197
  compactors_[0].append(std::forward<FwdT>(item));
185
198
  ++num_retained_;
186
199
  ++n_;
187
200
  if (num_retained_ == max_nom_size_) compress();
201
+ reset_sorted_view();
188
202
  }
189
203
 
190
- template<typename T, typename C, typename S, typename A>
204
+ template<typename T, typename C, typename A>
191
205
  template<typename FwdSk>
192
- void req_sketch<T, C, S, A>::merge(FwdSk&& other) {
206
+ void req_sketch<T, C, A>::merge(FwdSk&& other) {
193
207
  if (is_HRA() != other.is_HRA()) throw std::invalid_argument("merging HRA and LRA is not valid");
194
208
  if (other.is_empty()) return;
195
209
  if (is_empty()) {
196
- min_value_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.min_value_));
197
- max_value_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.max_value_));
210
+ min_item_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.min_item_));
211
+ max_item_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.max_item_));
198
212
  } else {
199
- if (C()(*other.min_value_, *min_value_)) *min_value_ = conditional_forward<FwdSk>(*other.min_value_);
200
- if (C()(*max_value_, *other.max_value_)) *max_value_ = conditional_forward<FwdSk>(*other.max_value_);
213
+ if (comparator_(*other.min_item_, *min_item_)) *min_item_ = conditional_forward<FwdSk>(*other.min_item_);
214
+ if (comparator_(*max_item_, *other.max_item_)) *max_item_ = conditional_forward<FwdSk>(*other.max_item_);
201
215
  }
202
216
  // grow until this has at least as many compactors as other
203
217
  while (get_num_levels() < other.get_num_levels()) grow();
@@ -209,128 +223,117 @@ void req_sketch<T, C, S, A>::merge(FwdSk&& other) {
209
223
  update_max_nom_size();
210
224
  update_num_retained();
211
225
  if (num_retained_ >= max_nom_size_) compress();
226
+ reset_sorted_view();
212
227
  }
213
228
 
214
- template<typename T, typename C, typename S, typename A>
215
- const T& req_sketch<T, C, S, A>::get_min_value() const {
216
- if (is_empty()) return get_invalid_value();
217
- return *min_value_;
229
+ template<typename T, typename C, typename A>
230
+ const T& req_sketch<T, C, A>::get_min_item() const {
231
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
232
+ return *min_item_;
218
233
  }
219
234
 
220
- template<typename T, typename C, typename S, typename A>
221
- const T& req_sketch<T, C, S, A>::get_max_value() const {
222
- if (is_empty()) return get_invalid_value();
223
- return *max_value_;
235
+ template<typename T, typename C, typename A>
236
+ const T& req_sketch<T, C, A>::get_max_item() const {
237
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
238
+ return *max_item_;
224
239
  }
225
240
 
226
- template<typename T, typename C, typename S, typename A>
227
- C req_sketch<T, C, S, A>::get_comparator() const {
228
- return C();
241
+ template<typename T, typename C, typename A>
242
+ C req_sketch<T, C, A>::get_comparator() const {
243
+ return comparator_;
229
244
  }
230
245
 
231
- template<typename T, typename C, typename S, typename A>
232
- template<bool inclusive>
233
- double req_sketch<T, C, S, A>::get_rank(const T& item) const {
246
+ template<typename T, typename C, typename A>
247
+ A req_sketch<T, C, A>::get_allocator() const {
248
+ return allocator_;
249
+ }
250
+
251
+ template<typename T, typename C, typename A>
252
+ double req_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
253
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
234
254
  uint64_t weight = 0;
235
255
  for (const auto& compactor: compactors_) {
236
- weight += compactor.template compute_weight<inclusive>(item);
256
+ weight += compactor.compute_weight(item, inclusive);
237
257
  }
238
258
  return static_cast<double>(weight) / n_;
239
259
  }
240
260
 
241
- template<typename T, typename C, typename S, typename A>
242
- template<bool inclusive>
243
- auto req_sketch<T, C, S, A>::get_PMF(const T* split_points, uint32_t size) const -> vector_double {
244
- auto buckets = get_CDF<inclusive>(split_points, size);
245
- if (is_empty()) return buckets;
246
- for (uint32_t i = size; i > 0; --i) {
247
- buckets[i] -= buckets[i - 1];
248
- }
249
- return buckets;
261
+ template<typename T, typename C, typename A>
262
+ auto req_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
263
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
264
+ setup_sorted_view();
265
+ return sorted_view_->get_PMF(split_points, size, inclusive);
250
266
  }
251
267
 
252
- template<typename T, typename C, typename S, typename A>
253
- template<bool inclusive>
254
- auto req_sketch<T, C, S, A>::get_CDF(const T* split_points, uint32_t size) const -> vector_double {
255
- vector_double buckets(allocator_);
256
- if (is_empty()) return buckets;
257
- check_split_points(split_points, size);
258
- buckets.reserve(size + 1);
259
- for (uint32_t i = 0; i < size; ++i) buckets.push_back(get_rank<inclusive>(split_points[i]));
260
- buckets.push_back(1);
261
- return buckets;
268
+ template<typename T, typename C, typename A>
269
+ auto req_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
270
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
271
+ setup_sorted_view();
272
+ return sorted_view_->get_CDF(split_points, size, inclusive);
262
273
  }
263
274
 
264
- template<typename T, typename C, typename S, typename A>
265
- template<bool inclusive>
266
- auto req_sketch<T, C, S, A>::get_quantile(double rank) const -> quantile_return_type {
267
- if (is_empty()) return get_invalid_value();
268
- if (rank == 0.0) return *min_value_;
269
- if (rank == 1.0) return *max_value_;
275
+ template<typename T, typename C, typename A>
276
+ auto req_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
277
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
270
278
  if ((rank < 0.0) || (rank > 1.0)) {
271
- throw std::invalid_argument("Rank cannot be less than zero or greater than 1.0");
279
+ throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
272
280
  }
273
281
  // possible side-effect of sorting level zero
274
- return get_sorted_view<inclusive>(true).get_quantile(rank);
282
+ setup_sorted_view();
283
+ return sorted_view_->get_quantile(rank, inclusive);
275
284
  }
276
285
 
277
- template<typename T, typename C, typename S, typename A>
278
- template<bool inclusive>
279
- std::vector<T, A> req_sketch<T, C, S, A>::get_quantiles(const double* ranks, uint32_t size) const {
286
+ template<typename T, typename C, typename A>
287
+ std::vector<T, A> req_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
288
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
280
289
  std::vector<T, A> quantiles(allocator_);
281
- if (is_empty()) return quantiles;
282
290
  quantiles.reserve(size);
283
291
 
284
292
  // possible side-effect of sorting level zero
285
- auto view = get_sorted_view<inclusive>(true);
293
+ setup_sorted_view();
286
294
 
287
295
  for (uint32_t i = 0; i < size; ++i) {
288
296
  const double rank = ranks[i];
289
297
  if ((rank < 0.0) || (rank > 1.0)) {
290
- throw std::invalid_argument("rank cannot be less than zero or greater than 1.0");
291
- }
292
- if (rank == 0.0) quantiles.push_back(*min_value_);
293
- else if (rank == 1.0) quantiles.push_back(*max_value_);
294
- else {
295
- quantiles.push_back(view.get_quantile(rank));
298
+ throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
296
299
  }
300
+ quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
297
301
  }
298
302
  return quantiles;
299
303
  }
300
304
 
301
- template<typename T, typename C, typename S, typename A>
302
- template<bool inclusive>
303
- quantile_sketch_sorted_view<T, C, A> req_sketch<T, C, S, A>::get_sorted_view(bool cumulative) const {
305
+ template<typename T, typename C, typename A>
306
+ quantiles_sorted_view<T, C, A> req_sketch<T, C, A>::get_sorted_view() const {
304
307
  if (!compactors_[0].is_sorted()) {
305
308
  const_cast<Compactor&>(compactors_[0]).sort(); // allow this side effect
306
309
  }
307
- quantile_sketch_sorted_view<T, C, A> view(get_num_retained(), allocator_);
310
+ quantiles_sorted_view<T, C, A> view(get_num_retained(), comparator_, allocator_);
308
311
 
309
312
  for (auto& compactor: compactors_) {
310
313
  view.add(compactor.begin(), compactor.end(), 1 << compactor.get_lg_weight());
311
314
  }
312
315
 
313
- if (cumulative) view.template convert_to_cummulative<inclusive>();
316
+ view.convert_to_cummulative();
314
317
  return view;
315
318
  }
316
319
 
317
- template<typename T, typename C, typename S, typename A>
318
- double req_sketch<T, C, S, A>::get_rank_lower_bound(double rank, uint8_t num_std_dev) const {
320
+ template<typename T, typename C, typename A>
321
+ double req_sketch<T, C, A>::get_rank_lower_bound(double rank, uint8_t num_std_dev) const {
319
322
  return get_rank_lb(get_k(), get_num_levels(), rank, num_std_dev, get_n(), hra_);
320
323
  }
321
324
 
322
- template<typename T, typename C, typename S, typename A>
323
- double req_sketch<T, C, S, A>::get_rank_upper_bound(double rank, uint8_t num_std_dev) const {
325
+ template<typename T, typename C, typename A>
326
+ double req_sketch<T, C, A>::get_rank_upper_bound(double rank, uint8_t num_std_dev) const {
324
327
  return get_rank_ub(get_k(), get_num_levels(), rank, num_std_dev, get_n(), hra_);
325
328
  }
326
329
 
327
- template<typename T, typename C, typename S, typename A>
328
- double req_sketch<T, C, S, A>::get_RSE(uint16_t k, double rank, bool hra, uint64_t n) {
330
+ template<typename T, typename C, typename A>
331
+ double req_sketch<T, C, A>::get_RSE(uint16_t k, double rank, bool hra, uint64_t n) {
329
332
  return get_rank_lb(k, 2, rank, 1, n, hra);
330
333
  }
331
334
 
332
- template<typename T, typename C, typename S, typename A>
333
- double req_sketch<T, C, S, A>::get_rank_lb(uint16_t k, uint8_t num_levels, double rank, uint8_t num_std_dev, uint64_t n, bool hra) {
335
+ template<typename T, typename C, typename A>
336
+ double req_sketch<T, C, A>::get_rank_lb(uint16_t k, uint8_t num_levels, double rank, uint8_t num_std_dev, uint64_t n, bool hra) {
334
337
  if (is_exact_rank(k, num_levels, rank, n, hra)) return rank;
335
338
  const double relative = relative_rse_factor() / k * (hra ? 1.0 - rank : rank);
336
339
  const double fixed = FIXED_RSE_FACTOR / k;
@@ -339,8 +342,8 @@ double req_sketch<T, C, S, A>::get_rank_lb(uint16_t k, uint8_t num_levels, doubl
339
342
  return std::max(lb_rel, lb_fix);
340
343
  }
341
344
 
342
- template<typename T, typename C, typename S, typename A>
343
- double req_sketch<T, C, S, A>::get_rank_ub(uint16_t k, uint8_t num_levels, double rank, uint8_t num_std_dev, uint64_t n, bool hra) {
345
+ template<typename T, typename C, typename A>
346
+ double req_sketch<T, C, A>::get_rank_ub(uint16_t k, uint8_t num_levels, double rank, uint8_t num_std_dev, uint64_t n, bool hra) {
344
347
  if (is_exact_rank(k, num_levels, rank, n, hra)) return rank;
345
348
  const double relative = relative_rse_factor() / k * (hra ? 1.0 - rank : rank);
346
349
  const double fixed = FIXED_RSE_FACTOR / k;
@@ -349,23 +352,23 @@ double req_sketch<T, C, S, A>::get_rank_ub(uint16_t k, uint8_t num_levels, doubl
349
352
  return std::min(ub_rel, ub_fix);
350
353
  }
351
354
 
352
- template<typename T, typename C, typename S, typename A>
353
- bool req_sketch<T, C, S, A>::is_exact_rank(uint16_t k, uint8_t num_levels, double rank, uint64_t n, bool hra) {
355
+ template<typename T, typename C, typename A>
356
+ bool req_sketch<T, C, A>::is_exact_rank(uint16_t k, uint8_t num_levels, double rank, uint64_t n, bool hra) {
354
357
  const unsigned base_cap = k * req_constants::INIT_NUM_SECTIONS;
355
358
  if (num_levels == 1 || n <= base_cap) return true;
356
359
  const double exact_rank_thresh = static_cast<double>(base_cap) / n;
357
360
  return (hra && rank >= 1.0 - exact_rank_thresh) || (!hra && rank <= exact_rank_thresh);
358
361
  }
359
362
 
360
- template<typename T, typename C, typename S, typename A>
361
- double req_sketch<T, C, S, A>::relative_rse_factor() {
363
+ template<typename T, typename C, typename A>
364
+ double req_sketch<T, C, A>::relative_rse_factor() {
362
365
  return sqrt(0.0512 / req_constants::INIT_NUM_SECTIONS);
363
366
  }
364
367
 
365
368
  // implementation for fixed-size arithmetic types (integral and floating point)
366
- template<typename T, typename C, typename S, typename A>
369
+ template<typename T, typename C, typename A>
367
370
  template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
368
- size_t req_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const {
371
+ size_t req_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& sd) const {
369
372
  size_t size = PREAMBLE_SIZE_BYTES;
370
373
  if (is_empty()) return size;
371
374
  if (is_estimation_mode()) {
@@ -380,15 +383,15 @@ size_t req_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const
380
383
  }
381
384
 
382
385
  // implementation for all other types
383
- template<typename T, typename C, typename S, typename A>
386
+ template<typename T, typename C, typename A>
384
387
  template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
385
- size_t req_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const {
388
+ size_t req_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& sd) const {
386
389
  size_t size = PREAMBLE_SIZE_BYTES;
387
390
  if (is_empty()) return size;
388
391
  if (is_estimation_mode()) {
389
392
  size += sizeof(n_);
390
- size += sd.size_of_item(*min_value_);
391
- size += sd.size_of_item(*max_value_);
393
+ size += sd.size_of_item(*min_item_);
394
+ size += sd.size_of_item(*max_item_);
392
395
  }
393
396
  if (n_ == 1) {
394
397
  size += sd.size_of_item(*compactors_[0].begin());
@@ -398,9 +401,9 @@ size_t req_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const
398
401
  return size;
399
402
  }
400
403
 
401
- template<typename T, typename C, typename S, typename A>
404
+ template<typename T, typename C, typename A>
402
405
  template<typename SerDe>
403
- void req_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const {
406
+ void req_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& sd) const {
404
407
  const uint8_t preamble_ints = is_estimation_mode() ? 4 : 2;
405
408
  write(os, preamble_ints);
406
409
  const uint8_t serial_version = SERIAL_VERSION;
@@ -423,8 +426,8 @@ void req_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const
423
426
  if (is_empty()) return;
424
427
  if (is_estimation_mode()) {
425
428
  write(os, n_);
426
- sd.serialize(os, min_value_, 1);
427
- sd.serialize(os, max_value_, 1);
429
+ sd.serialize(os, min_item_, 1);
430
+ sd.serialize(os, max_item_, 1);
428
431
  }
429
432
  if (raw_items) {
430
433
  sd.serialize(os, compactors_[0].begin(), num_raw_items);
@@ -433,9 +436,9 @@ void req_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const
433
436
  }
434
437
  }
435
438
 
436
- template<typename T, typename C, typename S, typename A>
439
+ template<typename T, typename C, typename A>
437
440
  template<typename SerDe>
438
- auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
441
+ auto req_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
439
442
  const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
440
443
  vector_bytes bytes(size, 0, allocator_);
441
444
  uint8_t* ptr = bytes.data() + header_size_bytes;
@@ -463,8 +466,8 @@ auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const SerDe&
463
466
  if (!is_empty()) {
464
467
  if (is_estimation_mode()) {
465
468
  ptr += copy_to_mem(n_, ptr);
466
- ptr += sd.serialize(ptr, end_ptr - ptr, min_value_, 1);
467
- ptr += sd.serialize(ptr, end_ptr - ptr, max_value_, 1);
469
+ ptr += sd.serialize(ptr, end_ptr - ptr, min_item_, 1);
470
+ ptr += sd.serialize(ptr, end_ptr - ptr, max_item_, 1);
468
471
  }
469
472
  if (raw_items) {
470
473
  ptr += sd.serialize(ptr, end_ptr - ptr, compactors_[0].begin(), num_raw_items);
@@ -475,14 +478,9 @@ auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const SerDe&
475
478
  return bytes;
476
479
  }
477
480
 
478
- template<typename T, typename C, typename S, typename A>
479
- req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, const A& allocator) {
480
- return deserialize(is, S(), allocator);
481
- }
482
-
483
- template<typename T, typename C, typename S, typename A>
481
+ template<typename T, typename C, typename A>
484
482
  template<typename SerDe>
485
- req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
483
+ req_sketch<T, C, A> req_sketch<T, C, A>::deserialize(std::istream& is, const SerDe& sd, const C& comparator, const A& allocator) {
486
484
  const auto preamble_ints = read<uint8_t>(is);
487
485
  const auto serial_version = read<uint8_t>(is);
488
486
  const auto family_id = read<uint8_t>(is);
@@ -498,14 +496,14 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, con
498
496
  if (!is.good()) throw std::runtime_error("error reading from std::istream");
499
497
  const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
500
498
  const bool hra = flags_byte & (1 << flags::IS_HIGH_RANK);
501
- if (is_empty) return req_sketch(k, hra, allocator);
499
+ if (is_empty) return req_sketch(k, hra, comparator, allocator);
502
500
 
503
501
  A alloc(allocator);
504
502
  auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
505
- std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
506
- std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
507
- std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
508
- std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
503
+ std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
504
+ std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
505
+ std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
506
+ std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
509
507
 
510
508
  const bool raw_items = flags_byte & (1 << flags::RAW_ITEMS);
511
509
  const bool is_level_0_sorted = flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED);
@@ -514,19 +512,19 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, con
514
512
  uint64_t n = 1;
515
513
  if (num_levels > 1) {
516
514
  n = read<uint64_t>(is);
517
- sd.deserialize(is, min_value_buffer.get(), 1);
515
+ sd.deserialize(is, min_item_buffer.get(), 1);
518
516
  // serde call did not throw, repackage with destrtuctor
519
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
520
- sd.deserialize(is, max_value_buffer.get(), 1);
517
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
518
+ sd.deserialize(is, max_item_buffer.get(), 1);
521
519
  // serde call did not throw, repackage with destrtuctor
522
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
520
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
523
521
  }
524
522
 
525
523
  if (raw_items) {
526
- compactors.push_back(Compactor::deserialize(is, sd, allocator, is_level_0_sorted, k, num_raw_items, hra));
524
+ compactors.push_back(Compactor::deserialize(is, sd, comparator, allocator, is_level_0_sorted, k, num_raw_items, hra));
527
525
  } else {
528
526
  for (size_t i = 0; i < num_levels; ++i) {
529
- compactors.push_back(Compactor::deserialize(is, sd, allocator, i == 0 ? is_level_0_sorted : true, hra));
527
+ compactors.push_back(Compactor::deserialize(is, sd, comparator, allocator, i == 0 ? is_level_0_sorted : true, hra));
530
528
  }
531
529
  }
532
530
  if (num_levels == 1) {
@@ -536,29 +534,24 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, con
536
534
  auto min_it = begin;
537
535
  auto max_it = begin;
538
536
  for (auto it = begin; it != end; ++it) {
539
- if (C()(*it, *min_it)) min_it = it;
540
- if (C()(*max_it, *it)) max_it = it;
537
+ if (comparator(*it, *min_it)) min_it = it;
538
+ if (comparator(*max_it, *it)) max_it = it;
541
539
  }
542
- new (min_value_buffer.get()) T(*min_it);
540
+ new (min_item_buffer.get()) T(*min_it);
543
541
  // copy did not throw, repackage with destrtuctor
544
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
545
- new (max_value_buffer.get()) T(*max_it);
542
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
543
+ new (max_item_buffer.get()) T(*max_it);
546
544
  // copy did not throw, repackage with destrtuctor
547
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
545
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
548
546
  }
549
547
 
550
548
  if (!is.good()) throw std::runtime_error("error reading from std::istream");
551
- return req_sketch(k, hra, n, std::move(min_value), std::move(max_value), std::move(compactors));
552
- }
553
-
554
- template<typename T, typename C, typename S, typename A>
555
- req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
556
- return deserialize(bytes, size, S(), allocator);
549
+ return req_sketch(k, hra, n, std::move(min_item), std::move(max_item), std::move(compactors), comparator);
557
550
  }
558
551
 
559
- template<typename T, typename C, typename S, typename A>
552
+ template<typename T, typename C, typename A>
560
553
  template<typename SerDe>
561
- req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
554
+ req_sketch<T, C, A> req_sketch<T, C, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const C& comparator, const A& allocator) {
562
555
  ensure_minimum_memory(size, 8);
563
556
  const char* ptr = static_cast<const char*>(bytes);
564
557
  const char* end_ptr = static_cast<const char*>(bytes) + size;
@@ -584,14 +577,14 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, si
584
577
 
585
578
  const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
586
579
  const bool hra = flags_byte & (1 << flags::IS_HIGH_RANK);
587
- if (is_empty) return req_sketch(k, hra, allocator);
580
+ if (is_empty) return req_sketch(k, hra, comparator, allocator);
588
581
 
589
582
  A alloc(allocator);
590
583
  auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
591
- std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
592
- std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
593
- std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
594
- std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
584
+ std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
585
+ std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
586
+ std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
587
+ std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
595
588
 
596
589
  const bool raw_items = flags_byte & (1 << flags::RAW_ITEMS);
597
590
  const bool is_level_0_sorted = flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED);
@@ -601,21 +594,21 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, si
601
594
  if (num_levels > 1) {
602
595
  ensure_minimum_memory(end_ptr - ptr, sizeof(n));
603
596
  ptr += copy_from_mem(ptr, n);
604
- ptr += sd.deserialize(ptr, end_ptr - ptr, min_value_buffer.get(), 1);
597
+ ptr += sd.deserialize(ptr, end_ptr - ptr, min_item_buffer.get(), 1);
605
598
  // serde call did not throw, repackage with destrtuctor
606
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
607
- ptr += sd.deserialize(ptr, end_ptr - ptr, max_value_buffer.get(), 1);
599
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
600
+ ptr += sd.deserialize(ptr, end_ptr - ptr, max_item_buffer.get(), 1);
608
601
  // serde call did not throw, repackage with destrtuctor
609
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
602
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
610
603
  }
611
604
 
612
605
  if (raw_items) {
613
- auto pair = Compactor::deserialize(ptr, end_ptr - ptr, sd, allocator, is_level_0_sorted, k, num_raw_items, hra);
606
+ auto pair = Compactor::deserialize(ptr, end_ptr - ptr, sd, comparator, allocator, is_level_0_sorted, k, num_raw_items, hra);
614
607
  compactors.push_back(std::move(pair.first));
615
608
  ptr += pair.second;
616
609
  } else {
617
610
  for (size_t i = 0; i < num_levels; ++i) {
618
- auto pair = Compactor::deserialize(ptr, end_ptr - ptr, sd, allocator, i == 0 ? is_level_0_sorted : true, hra);
611
+ auto pair = Compactor::deserialize(ptr, end_ptr - ptr, sd, comparator, allocator, i == 0 ? is_level_0_sorted : true, hra);
619
612
  compactors.push_back(std::move(pair.first));
620
613
  ptr += pair.second;
621
614
  }
@@ -627,46 +620,46 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, si
627
620
  auto min_it = begin;
628
621
  auto max_it = begin;
629
622
  for (auto it = begin; it != end; ++it) {
630
- if (C()(*it, *min_it)) min_it = it;
631
- if (C()(*max_it, *it)) max_it = it;
623
+ if (comparator(*it, *min_it)) min_it = it;
624
+ if (comparator(*max_it, *it)) max_it = it;
632
625
  }
633
- new (min_value_buffer.get()) T(*min_it);
626
+ new (min_item_buffer.get()) T(*min_it);
634
627
  // copy did not throw, repackage with destrtuctor
635
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
636
- new (max_value_buffer.get()) T(*max_it);
628
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
629
+ new (max_item_buffer.get()) T(*max_it);
637
630
  // copy did not throw, repackage with destrtuctor
638
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
631
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
639
632
  }
640
633
 
641
- return req_sketch(k, hra, n, std::move(min_value), std::move(max_value), std::move(compactors));
634
+ return req_sketch(k, hra, n, std::move(min_item), std::move(max_item), std::move(compactors), comparator);
642
635
  }
643
636
 
644
- template<typename T, typename C, typename S, typename A>
645
- void req_sketch<T, C, S, A>::grow() {
637
+ template<typename T, typename C, typename A>
638
+ void req_sketch<T, C, A>::grow() {
646
639
  const uint8_t lg_weight = get_num_levels();
647
- compactors_.push_back(Compactor(hra_, lg_weight, k_, allocator_));
640
+ compactors_.push_back(Compactor(hra_, lg_weight, k_, comparator_, allocator_));
648
641
  update_max_nom_size();
649
642
  }
650
643
 
651
- template<typename T, typename C, typename S, typename A>
652
- uint8_t req_sketch<T, C, S, A>::get_num_levels() const {
644
+ template<typename T, typename C, typename A>
645
+ uint8_t req_sketch<T, C, A>::get_num_levels() const {
653
646
  return static_cast<uint8_t>(compactors_.size());
654
647
  }
655
648
 
656
- template<typename T, typename C, typename S, typename A>
657
- void req_sketch<T, C, S, A>::update_max_nom_size() {
649
+ template<typename T, typename C, typename A>
650
+ void req_sketch<T, C, A>::update_max_nom_size() {
658
651
  max_nom_size_ = 0;
659
652
  for (const auto& compactor: compactors_) max_nom_size_ += compactor.get_nom_capacity();
660
653
  }
661
654
 
662
- template<typename T, typename C, typename S, typename A>
663
- void req_sketch<T, C, S, A>::update_num_retained() {
655
+ template<typename T, typename C, typename A>
656
+ void req_sketch<T, C, A>::update_num_retained() {
664
657
  num_retained_ = 0;
665
658
  for (const auto& compactor: compactors_) num_retained_ += compactor.get_num_items();
666
659
  }
667
660
 
668
- template<typename T, typename C, typename S, typename A>
669
- void req_sketch<T, C, S, A>::compress() {
661
+ template<typename T, typename C, typename A>
662
+ void req_sketch<T, C, A>::compress() {
670
663
  for (size_t h = 0; h < compactors_.size(); ++h) {
671
664
  if (compactors_[h].get_num_items() >= compactors_[h].get_nom_capacity()) {
672
665
  if (h == 0) compactors_[0].sort();
@@ -681,8 +674,8 @@ void req_sketch<T, C, S, A>::compress() {
681
674
  }
682
675
  }
683
676
 
684
- template<typename T, typename C, typename S, typename A>
685
- string<A> req_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items) const {
677
+ template<typename T, typename C, typename A>
678
+ string<A> req_sketch<T, C, A>::to_string(bool print_levels, bool print_items) const {
686
679
  // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
687
680
  // The stream does not support passing an allocator instance, and alternatives are complicated.
688
681
  std::ostringstream os;
@@ -697,8 +690,8 @@ string<A> req_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
697
690
  os << " Retained items : " << num_retained_ << std::endl;
698
691
  os << " Capacity items : " << max_nom_size_ << std::endl;
699
692
  if (!is_empty()) {
700
- os << " Min value : " << *min_value_ << std::endl;
701
- os << " Max value : " << *max_value_ << std::endl;
693
+ os << " Min item : " << *min_item_ << std::endl;
694
+ os << " Max item : " << *max_item_ << std::endl;
702
695
  }
703
696
  os << "### End sketch summary" << std::endl;
704
697
 
@@ -728,8 +721,8 @@ string<A> req_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
728
721
  return string<A>(os.str().c_str(), allocator_);
729
722
  }
730
723
 
731
- template<typename T, typename C, typename S, typename A>
732
- class req_sketch<T, C, S, A>::item_deleter {
724
+ template<typename T, typename C, typename A>
725
+ class req_sketch<T, C, A>::item_deleter {
733
726
  public:
734
727
  item_deleter(const A& allocator): allocator_(allocator) {}
735
728
  void operator() (T* ptr) {
@@ -742,8 +735,11 @@ class req_sketch<T, C, S, A>::item_deleter {
742
735
  A allocator_;
743
736
  };
744
737
 
745
- template<typename T, typename C, typename S, typename A>
746
- req_sketch<T, C, S, A>::req_sketch(uint16_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors):
738
+ template<typename T, typename C, typename A>
739
+ req_sketch<T, C, A>::req_sketch(uint16_t k, bool hra, uint64_t n,
740
+ std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item,
741
+ std::vector<Compactor, AllocCompactor>&& compactors, const C& comparator):
742
+ comparator_(comparator),
747
743
  allocator_(compactors.get_allocator()),
748
744
  k_(k),
749
745
  hra_(hra),
@@ -751,15 +747,16 @@ max_nom_size_(0),
751
747
  num_retained_(0),
752
748
  n_(n),
753
749
  compactors_(std::move(compactors)),
754
- min_value_(min_value.release()),
755
- max_value_(max_value.release())
750
+ min_item_(min_item.release()),
751
+ max_item_(max_item.release()),
752
+ sorted_view_(nullptr)
756
753
  {
757
754
  update_max_nom_size();
758
755
  update_num_retained();
759
756
  }
760
757
 
761
- template<typename T, typename C, typename S, typename A>
762
- void req_sketch<T, C, S, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t num_levels) {
758
+ template<typename T, typename C, typename A>
759
+ void req_sketch<T, C, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t num_levels) {
763
760
  const uint8_t expected_preamble_ints = num_levels > 1 ? 4 : 2;
764
761
  if (preamble_ints != expected_preamble_ints) {
765
762
  throw std::invalid_argument("Possible corruption: preamble ints must be "
@@ -767,8 +764,8 @@ void req_sketch<T, C, S, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t
767
764
  }
768
765
  }
769
766
 
770
- template<typename T, typename C, typename S, typename A>
771
- void req_sketch<T, C, S, A>::check_serial_version(uint8_t serial_version) {
767
+ template<typename T, typename C, typename A>
768
+ void req_sketch<T, C, A>::check_serial_version(uint8_t serial_version) {
772
769
  if (serial_version != SERIAL_VERSION) {
773
770
  throw std::invalid_argument("Possible corruption: serial version mismatch: expected "
774
771
  + std::to_string(SERIAL_VERSION)
@@ -776,35 +773,53 @@ void req_sketch<T, C, S, A>::check_serial_version(uint8_t serial_version) {
776
773
  }
777
774
  }
778
775
 
779
- template<typename T, typename C, typename S, typename A>
780
- void req_sketch<T, C, S, A>::check_family_id(uint8_t family_id) {
776
+ template<typename T, typename C, typename A>
777
+ void req_sketch<T, C, A>::check_family_id(uint8_t family_id) {
781
778
  if (family_id != FAMILY) {
782
779
  throw std::invalid_argument("Possible corruption: family mismatch: expected "
783
780
  + std::to_string(FAMILY) + ", got " + std::to_string(family_id));
784
781
  }
785
782
  }
786
783
 
787
- template<typename T, typename C, typename S, typename A>
788
- auto req_sketch<T, C, S, A>::begin() const -> const_iterator {
784
+ template<typename T, typename C, typename A>
785
+ auto req_sketch<T, C, A>::begin() const -> const_iterator {
789
786
  return const_iterator(compactors_.begin(), compactors_.end());
790
787
  }
791
788
 
792
- template<typename T, typename C, typename S, typename A>
793
- auto req_sketch<T, C, S, A>::end() const -> const_iterator {
789
+ template<typename T, typename C, typename A>
790
+ auto req_sketch<T, C, A>::end() const -> const_iterator {
794
791
  return const_iterator(compactors_.end(), compactors_.end());
795
792
  }
796
793
 
794
+ template<typename T, typename C, typename A>
795
+ void req_sketch<T, C, A>::setup_sorted_view() const {
796
+ if (sorted_view_ == nullptr) {
797
+ using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
798
+ sorted_view_ = new (AllocSortedView(allocator_).allocate(1)) quantiles_sorted_view<T, C, A>(get_sorted_view());
799
+ }
800
+ }
801
+
802
+ template<typename T, typename C, typename A>
803
+ void req_sketch<T, C, A>::reset_sorted_view() {
804
+ if (sorted_view_ != nullptr) {
805
+ sorted_view_->~quantiles_sorted_view();
806
+ using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
807
+ AllocSortedView(allocator_).deallocate(sorted_view_, 1);
808
+ sorted_view_ = nullptr;
809
+ }
810
+ }
811
+
797
812
  // iterator
798
813
 
799
- template<typename T, typename C, typename S, typename A>
800
- req_sketch<T, C, S, A>::const_iterator::const_iterator(LevelsIterator begin, LevelsIterator end):
814
+ template<typename T, typename C, typename A>
815
+ req_sketch<T, C, A>::const_iterator::const_iterator(LevelsIterator begin, LevelsIterator end):
801
816
  levels_it_(begin),
802
817
  levels_end_(end),
803
818
  compactor_it_(begin == end ? nullptr : (*levels_it_).begin())
804
819
  {}
805
820
 
806
- template<typename T, typename C, typename S, typename A>
807
- auto req_sketch<T, C, S, A>::const_iterator::operator++() -> const_iterator& {
821
+ template<typename T, typename C, typename A>
822
+ auto req_sketch<T, C, A>::const_iterator::operator++() -> const_iterator& {
808
823
  ++compactor_it_;
809
824
  if (compactor_it_ == (*levels_it_).end()) {
810
825
  ++levels_it_;
@@ -813,28 +828,33 @@ auto req_sketch<T, C, S, A>::const_iterator::operator++() -> const_iterator& {
813
828
  return *this;
814
829
  }
815
830
 
816
- template<typename T, typename C, typename S, typename A>
817
- auto req_sketch<T, C, S, A>::const_iterator::operator++(int) -> const_iterator& {
831
+ template<typename T, typename C, typename A>
832
+ auto req_sketch<T, C, A>::const_iterator::operator++(int) -> const_iterator& {
818
833
  const_iterator tmp(*this);
819
834
  operator++();
820
835
  return tmp;
821
836
  }
822
837
 
823
- template<typename T, typename C, typename S, typename A>
824
- bool req_sketch<T, C, S, A>::const_iterator::operator==(const const_iterator& other) const {
838
+ template<typename T, typename C, typename A>
839
+ bool req_sketch<T, C, A>::const_iterator::operator==(const const_iterator& other) const {
825
840
  if (levels_it_ != other.levels_it_) return false;
826
841
  if (levels_it_ == levels_end_) return true;
827
842
  return compactor_it_ == other.compactor_it_;
828
843
  }
829
844
 
830
- template<typename T, typename C, typename S, typename A>
831
- bool req_sketch<T, C, S, A>::const_iterator::operator!=(const const_iterator& other) const {
845
+ template<typename T, typename C, typename A>
846
+ bool req_sketch<T, C, A>::const_iterator::operator!=(const const_iterator& other) const {
832
847
  return !operator==(other);
833
848
  }
834
849
 
835
- template<typename T, typename C, typename S, typename A>
836
- std::pair<const T&, const uint64_t> req_sketch<T, C, S, A>::const_iterator::operator*() const {
837
- return std::pair<const T&, const uint64_t>(*compactor_it_, 1ULL << (*levels_it_).get_lg_weight());
850
+ template<typename T, typename C, typename A>
851
+ auto req_sketch<T, C, A>::const_iterator::operator*() const -> const value_type {
852
+ return value_type(*compactor_it_, 1ULL << (*levels_it_).get_lg_weight());
853
+ }
854
+
855
+ template<typename T, typename C, typename A>
856
+ auto req_sketch<T, C, A>::const_iterator::operator->() const -> const return_value_holder<value_type> {
857
+ return **this;
838
858
  }
839
859
 
840
860
  } /* namespace datasketches */