datasketches 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -25,8 +25,9 @@
25
25
 
26
26
  namespace datasketches {
27
27
 
28
- template<typename T, typename C, typename S, typename A>
29
- req_sketch<T, C, S, A>::req_sketch(uint16_t k, bool hra, const A& allocator):
28
+ template<typename T, typename C, typename A>
29
+ req_sketch<T, C, A>::req_sketch(uint16_t k, bool hra, const C& comparator, const A& allocator):
30
+ comparator_(comparator),
30
31
  allocator_(allocator),
31
32
  k_(std::max<uint8_t>(static_cast<int>(k) & -2, static_cast<int>(req_constants::MIN_K))), //rounds down one if odd
32
33
  hra_(hra),
@@ -34,26 +35,29 @@ max_nom_size_(0),
34
35
  num_retained_(0),
35
36
  n_(0),
36
37
  compactors_(allocator),
37
- min_value_(nullptr),
38
- max_value_(nullptr)
38
+ min_item_(nullptr),
39
+ max_item_(nullptr),
40
+ sorted_view_(nullptr)
39
41
  {
40
42
  grow();
41
43
  }
42
44
 
43
- template<typename T, typename C, typename S, typename A>
44
- req_sketch<T, C, S, A>::~req_sketch() {
45
- if (min_value_ != nullptr) {
46
- min_value_->~T();
47
- allocator_.deallocate(min_value_, 1);
45
+ template<typename T, typename C, typename A>
46
+ req_sketch<T, C, A>::~req_sketch() {
47
+ if (min_item_ != nullptr) {
48
+ min_item_->~T();
49
+ allocator_.deallocate(min_item_, 1);
48
50
  }
49
- if (max_value_ != nullptr) {
50
- max_value_->~T();
51
- allocator_.deallocate(max_value_, 1);
51
+ if (max_item_ != nullptr) {
52
+ max_item_->~T();
53
+ allocator_.deallocate(max_item_, 1);
52
54
  }
55
+ reset_sorted_view();
53
56
  }
54
57
 
55
- template<typename T, typename C, typename S, typename A>
56
- req_sketch<T, C, S, A>::req_sketch(const req_sketch& other):
58
+ template<typename T, typename C, typename A>
59
+ req_sketch<T, C, A>::req_sketch(const req_sketch& other):
60
+ comparator_(other.comparator_),
57
61
  allocator_(other.allocator_),
58
62
  k_(other.k_),
59
63
  hra_(other.hra_),
@@ -61,15 +65,17 @@ max_nom_size_(other.max_nom_size_),
61
65
  num_retained_(other.num_retained_),
62
66
  n_(other.n_),
63
67
  compactors_(other.compactors_),
64
- min_value_(nullptr),
65
- max_value_(nullptr)
68
+ min_item_(nullptr),
69
+ max_item_(nullptr),
70
+ sorted_view_(nullptr)
66
71
  {
67
- if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
68
- if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
72
+ if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
73
+ if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
69
74
  }
70
75
 
71
- template<typename T, typename C, typename S, typename A>
72
- req_sketch<T, C, S, A>::req_sketch(req_sketch&& other) noexcept :
76
+ template<typename T, typename C, typename A>
77
+ req_sketch<T, C, A>::req_sketch(req_sketch&& other) noexcept :
78
+ comparator_(std::move(other.comparator_)),
73
79
  allocator_(std::move(other.allocator_)),
74
80
  k_(other.k_),
75
81
  hra_(other.hra_),
@@ -77,16 +83,18 @@ max_nom_size_(other.max_nom_size_),
77
83
  num_retained_(other.num_retained_),
78
84
  n_(other.n_),
79
85
  compactors_(std::move(other.compactors_)),
80
- min_value_(other.min_value_),
81
- max_value_(other.max_value_)
86
+ min_item_(other.min_item_),
87
+ max_item_(other.max_item_),
88
+ sorted_view_(nullptr)
82
89
  {
83
- other.min_value_ = nullptr;
84
- other.max_value_ = nullptr;
90
+ other.min_item_ = nullptr;
91
+ other.max_item_ = nullptr;
85
92
  }
86
93
 
87
- template<typename T, typename C, typename S, typename A>
88
- req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(const req_sketch& other) {
94
+ template<typename T, typename C, typename A>
95
+ req_sketch<T, C, A>& req_sketch<T, C, A>::operator=(const req_sketch& other) {
89
96
  req_sketch copy(other);
97
+ std::swap(comparator_, copy.comparator_);
90
98
  std::swap(allocator_, copy.allocator_);
91
99
  std::swap(k_, copy.k_);
92
100
  std::swap(hra_, copy.hra_);
@@ -94,13 +102,15 @@ req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(const req_sketch& othe
94
102
  std::swap(num_retained_, copy.num_retained_);
95
103
  std::swap(n_, copy.n_);
96
104
  std::swap(compactors_, copy.compactors_);
97
- std::swap(min_value_, copy.min_value_);
98
- std::swap(max_value_, copy.max_value_);
105
+ std::swap(min_item_, copy.min_item_);
106
+ std::swap(max_item_, copy.max_item_);
107
+ reset_sorted_view();
99
108
  return *this;
100
109
  }
101
110
 
102
- template<typename T, typename C, typename S, typename A>
103
- req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(req_sketch&& other) {
111
+ template<typename T, typename C, typename A>
112
+ req_sketch<T, C, A>& req_sketch<T, C, A>::operator=(req_sketch&& other) {
113
+ std::swap(comparator_, other.comparator_);
104
114
  std::swap(allocator_, other.allocator_);
105
115
  std::swap(k_, other.k_);
106
116
  std::swap(hra_, other.hra_);
@@ -108,14 +118,16 @@ req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(req_sketch&& other) {
108
118
  std::swap(num_retained_, other.num_retained_);
109
119
  std::swap(n_, other.n_);
110
120
  std::swap(compactors_, other.compactors_);
111
- std::swap(min_value_, other.min_value_);
112
- std::swap(max_value_, other.max_value_);
121
+ std::swap(min_item_, other.min_item_);
122
+ std::swap(max_item_, other.max_item_);
123
+ reset_sorted_view();
113
124
  return *this;
114
125
  }
115
126
 
116
- template<typename T, typename C, typename S, typename A>
117
- template<typename TT, typename CC, typename SS, typename AA>
118
- req_sketch<T, C, S, A>::req_sketch(const req_sketch<TT, CC, SS, AA>& other, const A& allocator):
127
+ template<typename T, typename C, typename A>
128
+ template<typename TT, typename CC, typename AA>
129
+ req_sketch<T, C, A>::req_sketch(const req_sketch<TT, CC, AA>& other, const C& comparator, const A& allocator):
130
+ comparator_(comparator),
119
131
  allocator_(allocator),
120
132
  k_(other.k_),
121
133
  hra_(other.hra_),
@@ -123,8 +135,9 @@ max_nom_size_(other.max_nom_size_),
123
135
  num_retained_(other.num_retained_),
124
136
  n_(other.n_),
125
137
  compactors_(allocator),
126
- min_value_(nullptr),
127
- max_value_(nullptr)
138
+ min_item_(nullptr),
139
+ max_item_(nullptr),
140
+ sorted_view_(nullptr)
128
141
  {
129
142
  static_assert(
130
143
  std::is_constructible<T, TT>::value,
@@ -132,72 +145,73 @@ max_value_(nullptr)
132
145
  );
133
146
  compactors_.reserve(other.compactors_.size());
134
147
  for (const auto& compactor: other.compactors_) {
135
- compactors_.push_back(req_compactor<T, C, A>(compactor, allocator_));
148
+ compactors_.push_back(req_compactor<T, C, A>(compactor, comparator_, allocator_));
136
149
  }
137
150
  if (!other.is_empty()) {
138
- min_value_ = new (allocator_.allocate(1)) T(other.get_min_value());
139
- max_value_ = new (allocator_.allocate(1)) T(other.get_max_value());
151
+ min_item_ = new (allocator_.allocate(1)) T(other.get_min_item());
152
+ max_item_ = new (allocator_.allocate(1)) T(other.get_max_item());
140
153
  }
141
154
  }
142
155
 
143
- template<typename T, typename C, typename S, typename A>
144
- uint16_t req_sketch<T, C, S, A>::get_k() const {
156
+ template<typename T, typename C, typename A>
157
+ uint16_t req_sketch<T, C, A>::get_k() const {
145
158
  return k_;
146
159
  }
147
160
 
148
- template<typename T, typename C, typename S, typename A>
149
- bool req_sketch<T, C, S, A>::is_HRA() const {
161
+ template<typename T, typename C, typename A>
162
+ bool req_sketch<T, C, A>::is_HRA() const {
150
163
  return hra_;
151
164
  }
152
165
 
153
- template<typename T, typename C, typename S, typename A>
154
- bool req_sketch<T, C, S, A>::is_empty() const {
166
+ template<typename T, typename C, typename A>
167
+ bool req_sketch<T, C, A>::is_empty() const {
155
168
  return n_ == 0;
156
169
  }
157
170
 
158
- template<typename T, typename C, typename S, typename A>
159
- uint64_t req_sketch<T, C, S, A>::get_n() const {
171
+ template<typename T, typename C, typename A>
172
+ uint64_t req_sketch<T, C, A>::get_n() const {
160
173
  return n_;
161
174
  }
162
175
 
163
- template<typename T, typename C, typename S, typename A>
164
- uint32_t req_sketch<T, C, S, A>::get_num_retained() const {
176
+ template<typename T, typename C, typename A>
177
+ uint32_t req_sketch<T, C, A>::get_num_retained() const {
165
178
  return num_retained_;
166
179
  }
167
180
 
168
- template<typename T, typename C, typename S, typename A>
169
- bool req_sketch<T, C, S, A>::is_estimation_mode() const {
181
+ template<typename T, typename C, typename A>
182
+ bool req_sketch<T, C, A>::is_estimation_mode() const {
170
183
  return compactors_.size() > 1;
171
184
  }
172
185
 
173
- template<typename T, typename C, typename S, typename A>
186
+ template<typename T, typename C, typename A>
174
187
  template<typename FwdT>
175
- void req_sketch<T, C, S, A>::update(FwdT&& item) {
176
- if (!check_update_value(item)) { return; }
188
+ void req_sketch<T, C, A>::update(FwdT&& item) {
189
+ if (!check_update_item(item)) { return; }
177
190
  if (is_empty()) {
178
- min_value_ = new (allocator_.allocate(1)) T(item);
179
- max_value_ = new (allocator_.allocate(1)) T(item);
191
+ min_item_ = new (allocator_.allocate(1)) T(item);
192
+ max_item_ = new (allocator_.allocate(1)) T(item);
180
193
  } else {
181
- if (C()(item, *min_value_)) *min_value_ = item;
182
- if (C()(*max_value_, item)) *max_value_ = item;
194
+ if (comparator_(item, *min_item_)) *min_item_ = item;
195
+ if (comparator_(*max_item_, item)) *max_item_ = item;
183
196
  }
184
197
  compactors_[0].append(std::forward<FwdT>(item));
185
198
  ++num_retained_;
186
199
  ++n_;
187
200
  if (num_retained_ == max_nom_size_) compress();
201
+ reset_sorted_view();
188
202
  }
189
203
 
190
- template<typename T, typename C, typename S, typename A>
204
+ template<typename T, typename C, typename A>
191
205
  template<typename FwdSk>
192
- void req_sketch<T, C, S, A>::merge(FwdSk&& other) {
206
+ void req_sketch<T, C, A>::merge(FwdSk&& other) {
193
207
  if (is_HRA() != other.is_HRA()) throw std::invalid_argument("merging HRA and LRA is not valid");
194
208
  if (other.is_empty()) return;
195
209
  if (is_empty()) {
196
- min_value_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.min_value_));
197
- max_value_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.max_value_));
210
+ min_item_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.min_item_));
211
+ max_item_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.max_item_));
198
212
  } else {
199
- if (C()(*other.min_value_, *min_value_)) *min_value_ = conditional_forward<FwdSk>(*other.min_value_);
200
- if (C()(*max_value_, *other.max_value_)) *max_value_ = conditional_forward<FwdSk>(*other.max_value_);
213
+ if (comparator_(*other.min_item_, *min_item_)) *min_item_ = conditional_forward<FwdSk>(*other.min_item_);
214
+ if (comparator_(*max_item_, *other.max_item_)) *max_item_ = conditional_forward<FwdSk>(*other.max_item_);
201
215
  }
202
216
  // grow until this has at least as many compactors as other
203
217
  while (get_num_levels() < other.get_num_levels()) grow();
@@ -209,128 +223,117 @@ void req_sketch<T, C, S, A>::merge(FwdSk&& other) {
209
223
  update_max_nom_size();
210
224
  update_num_retained();
211
225
  if (num_retained_ >= max_nom_size_) compress();
226
+ reset_sorted_view();
212
227
  }
213
228
 
214
- template<typename T, typename C, typename S, typename A>
215
- const T& req_sketch<T, C, S, A>::get_min_value() const {
216
- if (is_empty()) return get_invalid_value();
217
- return *min_value_;
229
+ template<typename T, typename C, typename A>
230
+ const T& req_sketch<T, C, A>::get_min_item() const {
231
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
232
+ return *min_item_;
218
233
  }
219
234
 
220
- template<typename T, typename C, typename S, typename A>
221
- const T& req_sketch<T, C, S, A>::get_max_value() const {
222
- if (is_empty()) return get_invalid_value();
223
- return *max_value_;
235
+ template<typename T, typename C, typename A>
236
+ const T& req_sketch<T, C, A>::get_max_item() const {
237
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
238
+ return *max_item_;
224
239
  }
225
240
 
226
- template<typename T, typename C, typename S, typename A>
227
- C req_sketch<T, C, S, A>::get_comparator() const {
228
- return C();
241
+ template<typename T, typename C, typename A>
242
+ C req_sketch<T, C, A>::get_comparator() const {
243
+ return comparator_;
229
244
  }
230
245
 
231
- template<typename T, typename C, typename S, typename A>
232
- template<bool inclusive>
233
- double req_sketch<T, C, S, A>::get_rank(const T& item) const {
246
+ template<typename T, typename C, typename A>
247
+ A req_sketch<T, C, A>::get_allocator() const {
248
+ return allocator_;
249
+ }
250
+
251
+ template<typename T, typename C, typename A>
252
+ double req_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
253
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
234
254
  uint64_t weight = 0;
235
255
  for (const auto& compactor: compactors_) {
236
- weight += compactor.template compute_weight<inclusive>(item);
256
+ weight += compactor.compute_weight(item, inclusive);
237
257
  }
238
258
  return static_cast<double>(weight) / n_;
239
259
  }
240
260
 
241
- template<typename T, typename C, typename S, typename A>
242
- template<bool inclusive>
243
- auto req_sketch<T, C, S, A>::get_PMF(const T* split_points, uint32_t size) const -> vector_double {
244
- auto buckets = get_CDF<inclusive>(split_points, size);
245
- if (is_empty()) return buckets;
246
- for (uint32_t i = size; i > 0; --i) {
247
- buckets[i] -= buckets[i - 1];
248
- }
249
- return buckets;
261
+ template<typename T, typename C, typename A>
262
+ auto req_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
263
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
264
+ setup_sorted_view();
265
+ return sorted_view_->get_PMF(split_points, size, inclusive);
250
266
  }
251
267
 
252
- template<typename T, typename C, typename S, typename A>
253
- template<bool inclusive>
254
- auto req_sketch<T, C, S, A>::get_CDF(const T* split_points, uint32_t size) const -> vector_double {
255
- vector_double buckets(allocator_);
256
- if (is_empty()) return buckets;
257
- check_split_points(split_points, size);
258
- buckets.reserve(size + 1);
259
- for (uint32_t i = 0; i < size; ++i) buckets.push_back(get_rank<inclusive>(split_points[i]));
260
- buckets.push_back(1);
261
- return buckets;
268
+ template<typename T, typename C, typename A>
269
+ auto req_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
270
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
271
+ setup_sorted_view();
272
+ return sorted_view_->get_CDF(split_points, size, inclusive);
262
273
  }
263
274
 
264
- template<typename T, typename C, typename S, typename A>
265
- template<bool inclusive>
266
- auto req_sketch<T, C, S, A>::get_quantile(double rank) const -> quantile_return_type {
267
- if (is_empty()) return get_invalid_value();
268
- if (rank == 0.0) return *min_value_;
269
- if (rank == 1.0) return *max_value_;
275
+ template<typename T, typename C, typename A>
276
+ auto req_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
277
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
270
278
  if ((rank < 0.0) || (rank > 1.0)) {
271
- throw std::invalid_argument("Rank cannot be less than zero or greater than 1.0");
279
+ throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
272
280
  }
273
281
  // possible side-effect of sorting level zero
274
- return get_sorted_view<inclusive>(true).get_quantile(rank);
282
+ setup_sorted_view();
283
+ return sorted_view_->get_quantile(rank, inclusive);
275
284
  }
276
285
 
277
- template<typename T, typename C, typename S, typename A>
278
- template<bool inclusive>
279
- std::vector<T, A> req_sketch<T, C, S, A>::get_quantiles(const double* ranks, uint32_t size) const {
286
+ template<typename T, typename C, typename A>
287
+ std::vector<T, A> req_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
288
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
280
289
  std::vector<T, A> quantiles(allocator_);
281
- if (is_empty()) return quantiles;
282
290
  quantiles.reserve(size);
283
291
 
284
292
  // possible side-effect of sorting level zero
285
- auto view = get_sorted_view<inclusive>(true);
293
+ setup_sorted_view();
286
294
 
287
295
  for (uint32_t i = 0; i < size; ++i) {
288
296
  const double rank = ranks[i];
289
297
  if ((rank < 0.0) || (rank > 1.0)) {
290
- throw std::invalid_argument("rank cannot be less than zero or greater than 1.0");
291
- }
292
- if (rank == 0.0) quantiles.push_back(*min_value_);
293
- else if (rank == 1.0) quantiles.push_back(*max_value_);
294
- else {
295
- quantiles.push_back(view.get_quantile(rank));
298
+ throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
296
299
  }
300
+ quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
297
301
  }
298
302
  return quantiles;
299
303
  }
300
304
 
301
- template<typename T, typename C, typename S, typename A>
302
- template<bool inclusive>
303
- quantile_sketch_sorted_view<T, C, A> req_sketch<T, C, S, A>::get_sorted_view(bool cumulative) const {
305
+ template<typename T, typename C, typename A>
306
+ quantiles_sorted_view<T, C, A> req_sketch<T, C, A>::get_sorted_view() const {
304
307
  if (!compactors_[0].is_sorted()) {
305
308
  const_cast<Compactor&>(compactors_[0]).sort(); // allow this side effect
306
309
  }
307
- quantile_sketch_sorted_view<T, C, A> view(get_num_retained(), allocator_);
310
+ quantiles_sorted_view<T, C, A> view(get_num_retained(), comparator_, allocator_);
308
311
 
309
312
  for (auto& compactor: compactors_) {
310
313
  view.add(compactor.begin(), compactor.end(), 1 << compactor.get_lg_weight());
311
314
  }
312
315
 
313
- if (cumulative) view.template convert_to_cummulative<inclusive>();
316
+ view.convert_to_cummulative();
314
317
  return view;
315
318
  }
316
319
 
317
- template<typename T, typename C, typename S, typename A>
318
- double req_sketch<T, C, S, A>::get_rank_lower_bound(double rank, uint8_t num_std_dev) const {
320
+ template<typename T, typename C, typename A>
321
+ double req_sketch<T, C, A>::get_rank_lower_bound(double rank, uint8_t num_std_dev) const {
319
322
  return get_rank_lb(get_k(), get_num_levels(), rank, num_std_dev, get_n(), hra_);
320
323
  }
321
324
 
322
- template<typename T, typename C, typename S, typename A>
323
- double req_sketch<T, C, S, A>::get_rank_upper_bound(double rank, uint8_t num_std_dev) const {
325
+ template<typename T, typename C, typename A>
326
+ double req_sketch<T, C, A>::get_rank_upper_bound(double rank, uint8_t num_std_dev) const {
324
327
  return get_rank_ub(get_k(), get_num_levels(), rank, num_std_dev, get_n(), hra_);
325
328
  }
326
329
 
327
- template<typename T, typename C, typename S, typename A>
328
- double req_sketch<T, C, S, A>::get_RSE(uint16_t k, double rank, bool hra, uint64_t n) {
330
+ template<typename T, typename C, typename A>
331
+ double req_sketch<T, C, A>::get_RSE(uint16_t k, double rank, bool hra, uint64_t n) {
329
332
  return get_rank_lb(k, 2, rank, 1, n, hra);
330
333
  }
331
334
 
332
- template<typename T, typename C, typename S, typename A>
333
- double req_sketch<T, C, S, A>::get_rank_lb(uint16_t k, uint8_t num_levels, double rank, uint8_t num_std_dev, uint64_t n, bool hra) {
335
+ template<typename T, typename C, typename A>
336
+ double req_sketch<T, C, A>::get_rank_lb(uint16_t k, uint8_t num_levels, double rank, uint8_t num_std_dev, uint64_t n, bool hra) {
334
337
  if (is_exact_rank(k, num_levels, rank, n, hra)) return rank;
335
338
  const double relative = relative_rse_factor() / k * (hra ? 1.0 - rank : rank);
336
339
  const double fixed = FIXED_RSE_FACTOR / k;
@@ -339,8 +342,8 @@ double req_sketch<T, C, S, A>::get_rank_lb(uint16_t k, uint8_t num_levels, doubl
339
342
  return std::max(lb_rel, lb_fix);
340
343
  }
341
344
 
342
- template<typename T, typename C, typename S, typename A>
343
- double req_sketch<T, C, S, A>::get_rank_ub(uint16_t k, uint8_t num_levels, double rank, uint8_t num_std_dev, uint64_t n, bool hra) {
345
+ template<typename T, typename C, typename A>
346
+ double req_sketch<T, C, A>::get_rank_ub(uint16_t k, uint8_t num_levels, double rank, uint8_t num_std_dev, uint64_t n, bool hra) {
344
347
  if (is_exact_rank(k, num_levels, rank, n, hra)) return rank;
345
348
  const double relative = relative_rse_factor() / k * (hra ? 1.0 - rank : rank);
346
349
  const double fixed = FIXED_RSE_FACTOR / k;
@@ -349,23 +352,23 @@ double req_sketch<T, C, S, A>::get_rank_ub(uint16_t k, uint8_t num_levels, doubl
349
352
  return std::min(ub_rel, ub_fix);
350
353
  }
351
354
 
352
- template<typename T, typename C, typename S, typename A>
353
- bool req_sketch<T, C, S, A>::is_exact_rank(uint16_t k, uint8_t num_levels, double rank, uint64_t n, bool hra) {
355
+ template<typename T, typename C, typename A>
356
+ bool req_sketch<T, C, A>::is_exact_rank(uint16_t k, uint8_t num_levels, double rank, uint64_t n, bool hra) {
354
357
  const unsigned base_cap = k * req_constants::INIT_NUM_SECTIONS;
355
358
  if (num_levels == 1 || n <= base_cap) return true;
356
359
  const double exact_rank_thresh = static_cast<double>(base_cap) / n;
357
360
  return (hra && rank >= 1.0 - exact_rank_thresh) || (!hra && rank <= exact_rank_thresh);
358
361
  }
359
362
 
360
- template<typename T, typename C, typename S, typename A>
361
- double req_sketch<T, C, S, A>::relative_rse_factor() {
363
+ template<typename T, typename C, typename A>
364
+ double req_sketch<T, C, A>::relative_rse_factor() {
362
365
  return sqrt(0.0512 / req_constants::INIT_NUM_SECTIONS);
363
366
  }
364
367
 
365
368
  // implementation for fixed-size arithmetic types (integral and floating point)
366
- template<typename T, typename C, typename S, typename A>
369
+ template<typename T, typename C, typename A>
367
370
  template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
368
- size_t req_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const {
371
+ size_t req_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& sd) const {
369
372
  size_t size = PREAMBLE_SIZE_BYTES;
370
373
  if (is_empty()) return size;
371
374
  if (is_estimation_mode()) {
@@ -380,15 +383,15 @@ size_t req_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const
380
383
  }
381
384
 
382
385
  // implementation for all other types
383
- template<typename T, typename C, typename S, typename A>
386
+ template<typename T, typename C, typename A>
384
387
  template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
385
- size_t req_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const {
388
+ size_t req_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& sd) const {
386
389
  size_t size = PREAMBLE_SIZE_BYTES;
387
390
  if (is_empty()) return size;
388
391
  if (is_estimation_mode()) {
389
392
  size += sizeof(n_);
390
- size += sd.size_of_item(*min_value_);
391
- size += sd.size_of_item(*max_value_);
393
+ size += sd.size_of_item(*min_item_);
394
+ size += sd.size_of_item(*max_item_);
392
395
  }
393
396
  if (n_ == 1) {
394
397
  size += sd.size_of_item(*compactors_[0].begin());
@@ -398,9 +401,9 @@ size_t req_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const
398
401
  return size;
399
402
  }
400
403
 
401
- template<typename T, typename C, typename S, typename A>
404
+ template<typename T, typename C, typename A>
402
405
  template<typename SerDe>
403
- void req_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const {
406
+ void req_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& sd) const {
404
407
  const uint8_t preamble_ints = is_estimation_mode() ? 4 : 2;
405
408
  write(os, preamble_ints);
406
409
  const uint8_t serial_version = SERIAL_VERSION;
@@ -423,8 +426,8 @@ void req_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const
423
426
  if (is_empty()) return;
424
427
  if (is_estimation_mode()) {
425
428
  write(os, n_);
426
- sd.serialize(os, min_value_, 1);
427
- sd.serialize(os, max_value_, 1);
429
+ sd.serialize(os, min_item_, 1);
430
+ sd.serialize(os, max_item_, 1);
428
431
  }
429
432
  if (raw_items) {
430
433
  sd.serialize(os, compactors_[0].begin(), num_raw_items);
@@ -433,9 +436,9 @@ void req_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const
433
436
  }
434
437
  }
435
438
 
436
- template<typename T, typename C, typename S, typename A>
439
+ template<typename T, typename C, typename A>
437
440
  template<typename SerDe>
438
- auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
441
+ auto req_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
439
442
  const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
440
443
  vector_bytes bytes(size, 0, allocator_);
441
444
  uint8_t* ptr = bytes.data() + header_size_bytes;
@@ -463,8 +466,8 @@ auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const SerDe&
463
466
  if (!is_empty()) {
464
467
  if (is_estimation_mode()) {
465
468
  ptr += copy_to_mem(n_, ptr);
466
- ptr += sd.serialize(ptr, end_ptr - ptr, min_value_, 1);
467
- ptr += sd.serialize(ptr, end_ptr - ptr, max_value_, 1);
469
+ ptr += sd.serialize(ptr, end_ptr - ptr, min_item_, 1);
470
+ ptr += sd.serialize(ptr, end_ptr - ptr, max_item_, 1);
468
471
  }
469
472
  if (raw_items) {
470
473
  ptr += sd.serialize(ptr, end_ptr - ptr, compactors_[0].begin(), num_raw_items);
@@ -475,14 +478,9 @@ auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const SerDe&
475
478
  return bytes;
476
479
  }
477
480
 
478
- template<typename T, typename C, typename S, typename A>
479
- req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, const A& allocator) {
480
- return deserialize(is, S(), allocator);
481
- }
482
-
483
- template<typename T, typename C, typename S, typename A>
481
+ template<typename T, typename C, typename A>
484
482
  template<typename SerDe>
485
- req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
483
+ req_sketch<T, C, A> req_sketch<T, C, A>::deserialize(std::istream& is, const SerDe& sd, const C& comparator, const A& allocator) {
486
484
  const auto preamble_ints = read<uint8_t>(is);
487
485
  const auto serial_version = read<uint8_t>(is);
488
486
  const auto family_id = read<uint8_t>(is);
@@ -498,14 +496,14 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, con
498
496
  if (!is.good()) throw std::runtime_error("error reading from std::istream");
499
497
  const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
500
498
  const bool hra = flags_byte & (1 << flags::IS_HIGH_RANK);
501
- if (is_empty) return req_sketch(k, hra, allocator);
499
+ if (is_empty) return req_sketch(k, hra, comparator, allocator);
502
500
 
503
501
  A alloc(allocator);
504
502
  auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
505
- std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
506
- std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
507
- std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
508
- std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
503
+ std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
504
+ std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
505
+ std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
506
+ std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
509
507
 
510
508
  const bool raw_items = flags_byte & (1 << flags::RAW_ITEMS);
511
509
  const bool is_level_0_sorted = flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED);
@@ -514,19 +512,19 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, con
514
512
  uint64_t n = 1;
515
513
  if (num_levels > 1) {
516
514
  n = read<uint64_t>(is);
517
- sd.deserialize(is, min_value_buffer.get(), 1);
515
+ sd.deserialize(is, min_item_buffer.get(), 1);
518
516
  // serde call did not throw, repackage with destrtuctor
519
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
520
- sd.deserialize(is, max_value_buffer.get(), 1);
517
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
518
+ sd.deserialize(is, max_item_buffer.get(), 1);
521
519
  // serde call did not throw, repackage with destrtuctor
522
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
520
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
523
521
  }
524
522
 
525
523
  if (raw_items) {
526
- compactors.push_back(Compactor::deserialize(is, sd, allocator, is_level_0_sorted, k, num_raw_items, hra));
524
+ compactors.push_back(Compactor::deserialize(is, sd, comparator, allocator, is_level_0_sorted, k, num_raw_items, hra));
527
525
  } else {
528
526
  for (size_t i = 0; i < num_levels; ++i) {
529
- compactors.push_back(Compactor::deserialize(is, sd, allocator, i == 0 ? is_level_0_sorted : true, hra));
527
+ compactors.push_back(Compactor::deserialize(is, sd, comparator, allocator, i == 0 ? is_level_0_sorted : true, hra));
530
528
  }
531
529
  }
532
530
  if (num_levels == 1) {
@@ -536,29 +534,24 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, con
536
534
  auto min_it = begin;
537
535
  auto max_it = begin;
538
536
  for (auto it = begin; it != end; ++it) {
539
- if (C()(*it, *min_it)) min_it = it;
540
- if (C()(*max_it, *it)) max_it = it;
537
+ if (comparator(*it, *min_it)) min_it = it;
538
+ if (comparator(*max_it, *it)) max_it = it;
541
539
  }
542
- new (min_value_buffer.get()) T(*min_it);
540
+ new (min_item_buffer.get()) T(*min_it);
543
541
  // copy did not throw, repackage with destrtuctor
544
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
545
- new (max_value_buffer.get()) T(*max_it);
542
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
543
+ new (max_item_buffer.get()) T(*max_it);
546
544
  // copy did not throw, repackage with destrtuctor
547
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
545
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
548
546
  }
549
547
 
550
548
  if (!is.good()) throw std::runtime_error("error reading from std::istream");
551
- return req_sketch(k, hra, n, std::move(min_value), std::move(max_value), std::move(compactors));
552
- }
553
-
554
- template<typename T, typename C, typename S, typename A>
555
- req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
556
- return deserialize(bytes, size, S(), allocator);
549
+ return req_sketch(k, hra, n, std::move(min_item), std::move(max_item), std::move(compactors), comparator);
557
550
  }
558
551
 
559
- template<typename T, typename C, typename S, typename A>
552
+ template<typename T, typename C, typename A>
560
553
  template<typename SerDe>
561
- req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
554
+ req_sketch<T, C, A> req_sketch<T, C, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const C& comparator, const A& allocator) {
562
555
  ensure_minimum_memory(size, 8);
563
556
  const char* ptr = static_cast<const char*>(bytes);
564
557
  const char* end_ptr = static_cast<const char*>(bytes) + size;
@@ -584,14 +577,14 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, si
584
577
 
585
578
  const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
586
579
  const bool hra = flags_byte & (1 << flags::IS_HIGH_RANK);
587
- if (is_empty) return req_sketch(k, hra, allocator);
580
+ if (is_empty) return req_sketch(k, hra, comparator, allocator);
588
581
 
589
582
  A alloc(allocator);
590
583
  auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
591
- std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
592
- std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
593
- std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
594
- std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
584
+ std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
585
+ std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
586
+ std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
587
+ std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
595
588
 
596
589
  const bool raw_items = flags_byte & (1 << flags::RAW_ITEMS);
597
590
  const bool is_level_0_sorted = flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED);
@@ -601,21 +594,21 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, si
601
594
  if (num_levels > 1) {
602
595
  ensure_minimum_memory(end_ptr - ptr, sizeof(n));
603
596
  ptr += copy_from_mem(ptr, n);
604
- ptr += sd.deserialize(ptr, end_ptr - ptr, min_value_buffer.get(), 1);
597
+ ptr += sd.deserialize(ptr, end_ptr - ptr, min_item_buffer.get(), 1);
605
598
  // serde call did not throw, repackage with destrtuctor
606
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
607
- ptr += sd.deserialize(ptr, end_ptr - ptr, max_value_buffer.get(), 1);
599
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
600
+ ptr += sd.deserialize(ptr, end_ptr - ptr, max_item_buffer.get(), 1);
608
601
  // serde call did not throw, repackage with destrtuctor
609
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
602
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
610
603
  }
611
604
 
612
605
  if (raw_items) {
613
- auto pair = Compactor::deserialize(ptr, end_ptr - ptr, sd, allocator, is_level_0_sorted, k, num_raw_items, hra);
606
+ auto pair = Compactor::deserialize(ptr, end_ptr - ptr, sd, comparator, allocator, is_level_0_sorted, k, num_raw_items, hra);
614
607
  compactors.push_back(std::move(pair.first));
615
608
  ptr += pair.second;
616
609
  } else {
617
610
  for (size_t i = 0; i < num_levels; ++i) {
618
- auto pair = Compactor::deserialize(ptr, end_ptr - ptr, sd, allocator, i == 0 ? is_level_0_sorted : true, hra);
611
+ auto pair = Compactor::deserialize(ptr, end_ptr - ptr, sd, comparator, allocator, i == 0 ? is_level_0_sorted : true, hra);
619
612
  compactors.push_back(std::move(pair.first));
620
613
  ptr += pair.second;
621
614
  }
@@ -627,46 +620,46 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, si
627
620
  auto min_it = begin;
628
621
  auto max_it = begin;
629
622
  for (auto it = begin; it != end; ++it) {
630
- if (C()(*it, *min_it)) min_it = it;
631
- if (C()(*max_it, *it)) max_it = it;
623
+ if (comparator(*it, *min_it)) min_it = it;
624
+ if (comparator(*max_it, *it)) max_it = it;
632
625
  }
633
- new (min_value_buffer.get()) T(*min_it);
626
+ new (min_item_buffer.get()) T(*min_it);
634
627
  // copy did not throw, repackage with destrtuctor
635
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
636
- new (max_value_buffer.get()) T(*max_it);
628
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
629
+ new (max_item_buffer.get()) T(*max_it);
637
630
  // copy did not throw, repackage with destrtuctor
638
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
631
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
639
632
  }
640
633
 
641
- return req_sketch(k, hra, n, std::move(min_value), std::move(max_value), std::move(compactors));
634
+ return req_sketch(k, hra, n, std::move(min_item), std::move(max_item), std::move(compactors), comparator);
642
635
  }
643
636
 
644
- template<typename T, typename C, typename S, typename A>
645
- void req_sketch<T, C, S, A>::grow() {
637
+ template<typename T, typename C, typename A>
638
+ void req_sketch<T, C, A>::grow() {
646
639
  const uint8_t lg_weight = get_num_levels();
647
- compactors_.push_back(Compactor(hra_, lg_weight, k_, allocator_));
640
+ compactors_.push_back(Compactor(hra_, lg_weight, k_, comparator_, allocator_));
648
641
  update_max_nom_size();
649
642
  }
650
643
 
651
- template<typename T, typename C, typename S, typename A>
652
- uint8_t req_sketch<T, C, S, A>::get_num_levels() const {
644
+ template<typename T, typename C, typename A>
645
+ uint8_t req_sketch<T, C, A>::get_num_levels() const {
653
646
  return static_cast<uint8_t>(compactors_.size());
654
647
  }
655
648
 
656
- template<typename T, typename C, typename S, typename A>
657
- void req_sketch<T, C, S, A>::update_max_nom_size() {
649
+ template<typename T, typename C, typename A>
650
+ void req_sketch<T, C, A>::update_max_nom_size() {
658
651
  max_nom_size_ = 0;
659
652
  for (const auto& compactor: compactors_) max_nom_size_ += compactor.get_nom_capacity();
660
653
  }
661
654
 
662
- template<typename T, typename C, typename S, typename A>
663
- void req_sketch<T, C, S, A>::update_num_retained() {
655
+ template<typename T, typename C, typename A>
656
+ void req_sketch<T, C, A>::update_num_retained() {
664
657
  num_retained_ = 0;
665
658
  for (const auto& compactor: compactors_) num_retained_ += compactor.get_num_items();
666
659
  }
667
660
 
668
- template<typename T, typename C, typename S, typename A>
669
- void req_sketch<T, C, S, A>::compress() {
661
+ template<typename T, typename C, typename A>
662
+ void req_sketch<T, C, A>::compress() {
670
663
  for (size_t h = 0; h < compactors_.size(); ++h) {
671
664
  if (compactors_[h].get_num_items() >= compactors_[h].get_nom_capacity()) {
672
665
  if (h == 0) compactors_[0].sort();
@@ -681,8 +674,8 @@ void req_sketch<T, C, S, A>::compress() {
681
674
  }
682
675
  }
683
676
 
684
- template<typename T, typename C, typename S, typename A>
685
- string<A> req_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items) const {
677
+ template<typename T, typename C, typename A>
678
+ string<A> req_sketch<T, C, A>::to_string(bool print_levels, bool print_items) const {
686
679
  // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
687
680
  // The stream does not support passing an allocator instance, and alternatives are complicated.
688
681
  std::ostringstream os;
@@ -697,8 +690,8 @@ string<A> req_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
697
690
  os << " Retained items : " << num_retained_ << std::endl;
698
691
  os << " Capacity items : " << max_nom_size_ << std::endl;
699
692
  if (!is_empty()) {
700
- os << " Min value : " << *min_value_ << std::endl;
701
- os << " Max value : " << *max_value_ << std::endl;
693
+ os << " Min item : " << *min_item_ << std::endl;
694
+ os << " Max item : " << *max_item_ << std::endl;
702
695
  }
703
696
  os << "### End sketch summary" << std::endl;
704
697
 
@@ -728,8 +721,8 @@ string<A> req_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
728
721
  return string<A>(os.str().c_str(), allocator_);
729
722
  }
730
723
 
731
- template<typename T, typename C, typename S, typename A>
732
- class req_sketch<T, C, S, A>::item_deleter {
724
+ template<typename T, typename C, typename A>
725
+ class req_sketch<T, C, A>::item_deleter {
733
726
  public:
734
727
  item_deleter(const A& allocator): allocator_(allocator) {}
735
728
  void operator() (T* ptr) {
@@ -742,8 +735,11 @@ class req_sketch<T, C, S, A>::item_deleter {
742
735
  A allocator_;
743
736
  };
744
737
 
745
- template<typename T, typename C, typename S, typename A>
746
- req_sketch<T, C, S, A>::req_sketch(uint16_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors):
738
+ template<typename T, typename C, typename A>
739
+ req_sketch<T, C, A>::req_sketch(uint16_t k, bool hra, uint64_t n,
740
+ std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item,
741
+ std::vector<Compactor, AllocCompactor>&& compactors, const C& comparator):
742
+ comparator_(comparator),
747
743
  allocator_(compactors.get_allocator()),
748
744
  k_(k),
749
745
  hra_(hra),
@@ -751,15 +747,16 @@ max_nom_size_(0),
751
747
  num_retained_(0),
752
748
  n_(n),
753
749
  compactors_(std::move(compactors)),
754
- min_value_(min_value.release()),
755
- max_value_(max_value.release())
750
+ min_item_(min_item.release()),
751
+ max_item_(max_item.release()),
752
+ sorted_view_(nullptr)
756
753
  {
757
754
  update_max_nom_size();
758
755
  update_num_retained();
759
756
  }
760
757
 
761
- template<typename T, typename C, typename S, typename A>
762
- void req_sketch<T, C, S, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t num_levels) {
758
+ template<typename T, typename C, typename A>
759
+ void req_sketch<T, C, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t num_levels) {
763
760
  const uint8_t expected_preamble_ints = num_levels > 1 ? 4 : 2;
764
761
  if (preamble_ints != expected_preamble_ints) {
765
762
  throw std::invalid_argument("Possible corruption: preamble ints must be "
@@ -767,8 +764,8 @@ void req_sketch<T, C, S, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t
767
764
  }
768
765
  }
769
766
 
770
- template<typename T, typename C, typename S, typename A>
771
- void req_sketch<T, C, S, A>::check_serial_version(uint8_t serial_version) {
767
+ template<typename T, typename C, typename A>
768
+ void req_sketch<T, C, A>::check_serial_version(uint8_t serial_version) {
772
769
  if (serial_version != SERIAL_VERSION) {
773
770
  throw std::invalid_argument("Possible corruption: serial version mismatch: expected "
774
771
  + std::to_string(SERIAL_VERSION)
@@ -776,35 +773,53 @@ void req_sketch<T, C, S, A>::check_serial_version(uint8_t serial_version) {
776
773
  }
777
774
  }
778
775
 
779
- template<typename T, typename C, typename S, typename A>
780
- void req_sketch<T, C, S, A>::check_family_id(uint8_t family_id) {
776
+ template<typename T, typename C, typename A>
777
+ void req_sketch<T, C, A>::check_family_id(uint8_t family_id) {
781
778
  if (family_id != FAMILY) {
782
779
  throw std::invalid_argument("Possible corruption: family mismatch: expected "
783
780
  + std::to_string(FAMILY) + ", got " + std::to_string(family_id));
784
781
  }
785
782
  }
786
783
 
787
- template<typename T, typename C, typename S, typename A>
788
- auto req_sketch<T, C, S, A>::begin() const -> const_iterator {
784
+ template<typename T, typename C, typename A>
785
+ auto req_sketch<T, C, A>::begin() const -> const_iterator {
789
786
  return const_iterator(compactors_.begin(), compactors_.end());
790
787
  }
791
788
 
792
- template<typename T, typename C, typename S, typename A>
793
- auto req_sketch<T, C, S, A>::end() const -> const_iterator {
789
+ template<typename T, typename C, typename A>
790
+ auto req_sketch<T, C, A>::end() const -> const_iterator {
794
791
  return const_iterator(compactors_.end(), compactors_.end());
795
792
  }
796
793
 
794
+ template<typename T, typename C, typename A>
795
+ void req_sketch<T, C, A>::setup_sorted_view() const {
796
+ if (sorted_view_ == nullptr) {
797
+ using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
798
+ sorted_view_ = new (AllocSortedView(allocator_).allocate(1)) quantiles_sorted_view<T, C, A>(get_sorted_view());
799
+ }
800
+ }
801
+
802
+ template<typename T, typename C, typename A>
803
+ void req_sketch<T, C, A>::reset_sorted_view() {
804
+ if (sorted_view_ != nullptr) {
805
+ sorted_view_->~quantiles_sorted_view();
806
+ using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
807
+ AllocSortedView(allocator_).deallocate(sorted_view_, 1);
808
+ sorted_view_ = nullptr;
809
+ }
810
+ }
811
+
797
812
  // iterator
798
813
 
799
- template<typename T, typename C, typename S, typename A>
800
- req_sketch<T, C, S, A>::const_iterator::const_iterator(LevelsIterator begin, LevelsIterator end):
814
+ template<typename T, typename C, typename A>
815
+ req_sketch<T, C, A>::const_iterator::const_iterator(LevelsIterator begin, LevelsIterator end):
801
816
  levels_it_(begin),
802
817
  levels_end_(end),
803
818
  compactor_it_(begin == end ? nullptr : (*levels_it_).begin())
804
819
  {}
805
820
 
806
- template<typename T, typename C, typename S, typename A>
807
- auto req_sketch<T, C, S, A>::const_iterator::operator++() -> const_iterator& {
821
+ template<typename T, typename C, typename A>
822
+ auto req_sketch<T, C, A>::const_iterator::operator++() -> const_iterator& {
808
823
  ++compactor_it_;
809
824
  if (compactor_it_ == (*levels_it_).end()) {
810
825
  ++levels_it_;
@@ -813,28 +828,33 @@ auto req_sketch<T, C, S, A>::const_iterator::operator++() -> const_iterator& {
813
828
  return *this;
814
829
  }
815
830
 
816
- template<typename T, typename C, typename S, typename A>
817
- auto req_sketch<T, C, S, A>::const_iterator::operator++(int) -> const_iterator& {
831
+ template<typename T, typename C, typename A>
832
+ auto req_sketch<T, C, A>::const_iterator::operator++(int) -> const_iterator& {
818
833
  const_iterator tmp(*this);
819
834
  operator++();
820
835
  return tmp;
821
836
  }
822
837
 
823
- template<typename T, typename C, typename S, typename A>
824
- bool req_sketch<T, C, S, A>::const_iterator::operator==(const const_iterator& other) const {
838
+ template<typename T, typename C, typename A>
839
+ bool req_sketch<T, C, A>::const_iterator::operator==(const const_iterator& other) const {
825
840
  if (levels_it_ != other.levels_it_) return false;
826
841
  if (levels_it_ == levels_end_) return true;
827
842
  return compactor_it_ == other.compactor_it_;
828
843
  }
829
844
 
830
- template<typename T, typename C, typename S, typename A>
831
- bool req_sketch<T, C, S, A>::const_iterator::operator!=(const const_iterator& other) const {
845
+ template<typename T, typename C, typename A>
846
+ bool req_sketch<T, C, A>::const_iterator::operator!=(const const_iterator& other) const {
832
847
  return !operator==(other);
833
848
  }
834
849
 
835
- template<typename T, typename C, typename S, typename A>
836
- std::pair<const T&, const uint64_t> req_sketch<T, C, S, A>::const_iterator::operator*() const {
837
- return std::pair<const T&, const uint64_t>(*compactor_it_, 1ULL << (*levels_it_).get_lg_weight());
850
+ template<typename T, typename C, typename A>
851
+ auto req_sketch<T, C, A>::const_iterator::operator*() const -> const value_type {
852
+ return value_type(*compactor_it_, 1ULL << (*levels_it_).get_lg_weight());
853
+ }
854
+
855
+ template<typename T, typename C, typename A>
856
+ auto req_sketch<T, C, A>::const_iterator::operator->() const -> const return_value_holder<value_type> {
857
+ return **this;
838
858
  }
839
859
 
840
860
  } /* namespace datasketches */