datasketches 0.2.7 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -32,20 +32,22 @@
32
32
 
33
33
  namespace datasketches {
34
34
 
35
- template<typename T, typename C, typename S, typename A>
36
- kll_sketch<T, C, S, A>::kll_sketch(uint16_t k, const A& allocator):
35
+ template<typename T, typename C, typename A>
36
+ kll_sketch<T, C, A>::kll_sketch(uint16_t k, const C& comparator, const A& allocator):
37
+ comparator_(comparator),
37
38
  allocator_(allocator),
38
39
  k_(k),
39
40
  m_(DEFAULT_M),
40
41
  min_k_(k),
41
- n_(0),
42
42
  num_levels_(1),
43
+ is_level_zero_sorted_(false),
44
+ n_(0),
43
45
  levels_(2, 0, allocator),
44
46
  items_(nullptr),
45
47
  items_size_(k_),
46
- min_value_(nullptr),
47
- max_value_(nullptr),
48
- is_level_zero_sorted_(false)
48
+ min_item_(nullptr),
49
+ max_item_(nullptr),
50
+ sorted_view_(nullptr)
49
51
  {
50
52
  if (k < MIN_K || k > MAX_K) {
51
53
  throw std::invalid_argument("K must be >= " + std::to_string(MIN_K) + " and <= " + std::to_string(MAX_K) + ": " + std::to_string(k));
@@ -54,115 +56,126 @@ is_level_zero_sorted_(false)
54
56
  items_ = allocator_.allocate(items_size_);
55
57
  }
56
58
 
57
- template<typename T, typename C, typename S, typename A>
58
- kll_sketch<T, C, S, A>::kll_sketch(const kll_sketch& other):
59
+ template<typename T, typename C, typename A>
60
+ kll_sketch<T, C, A>::kll_sketch(const kll_sketch& other):
61
+ comparator_(other.comparator_),
59
62
  allocator_(other.allocator_),
60
63
  k_(other.k_),
61
64
  m_(other.m_),
62
65
  min_k_(other.min_k_),
63
- n_(other.n_),
64
66
  num_levels_(other.num_levels_),
67
+ is_level_zero_sorted_(other.is_level_zero_sorted_),
68
+ n_(other.n_),
65
69
  levels_(other.levels_),
66
70
  items_(nullptr),
67
71
  items_size_(other.items_size_),
68
- min_value_(nullptr),
69
- max_value_(nullptr),
70
- is_level_zero_sorted_(other.is_level_zero_sorted_)
72
+ min_item_(nullptr),
73
+ max_item_(nullptr),
74
+ sorted_view_(nullptr)
71
75
  {
72
76
  items_ = allocator_.allocate(items_size_);
73
77
  for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
74
- if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
75
- if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
78
+ if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
79
+ if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
76
80
  }
77
81
 
78
- template<typename T, typename C, typename S, typename A>
79
- kll_sketch<T, C, S, A>::kll_sketch(kll_sketch&& other) noexcept:
82
+ template<typename T, typename C, typename A>
83
+ kll_sketch<T, C, A>::kll_sketch(kll_sketch&& other) noexcept:
84
+ comparator_(std::move(other.comparator_)),
80
85
  allocator_(std::move(other.allocator_)),
81
86
  k_(other.k_),
82
87
  m_(other.m_),
83
88
  min_k_(other.min_k_),
84
- n_(other.n_),
85
89
  num_levels_(other.num_levels_),
90
+ is_level_zero_sorted_(other.is_level_zero_sorted_),
91
+ n_(other.n_),
86
92
  levels_(std::move(other.levels_)),
87
93
  items_(other.items_),
88
94
  items_size_(other.items_size_),
89
- min_value_(other.min_value_),
90
- max_value_(other.max_value_),
91
- is_level_zero_sorted_(other.is_level_zero_sorted_)
95
+ min_item_(other.min_item_),
96
+ max_item_(other.max_item_),
97
+ sorted_view_(nullptr)
92
98
  {
93
99
  other.items_ = nullptr;
94
- other.min_value_ = nullptr;
95
- other.max_value_ = nullptr;
100
+ other.min_item_ = nullptr;
101
+ other.max_item_ = nullptr;
96
102
  }
97
103
 
98
- template<typename T, typename C, typename S, typename A>
99
- kll_sketch<T, C, S, A>& kll_sketch<T, C, S, A>::operator=(const kll_sketch& other) {
100
- kll_sketch<T, C, S, A> copy(other);
104
+ template<typename T, typename C, typename A>
105
+ kll_sketch<T, C, A>& kll_sketch<T, C, A>::operator=(const kll_sketch& other) {
106
+ kll_sketch copy(other);
107
+ std::swap(comparator_, copy.comparator_);
101
108
  std::swap(allocator_, copy.allocator_);
102
109
  std::swap(k_, copy.k_);
103
110
  std::swap(m_, copy.m_);
104
111
  std::swap(min_k_, copy.min_k_);
105
- std::swap(n_, copy.n_);
106
112
  std::swap(num_levels_, copy.num_levels_);
113
+ std::swap(is_level_zero_sorted_, copy.is_level_zero_sorted_);
114
+ std::swap(n_, copy.n_);
107
115
  std::swap(levels_, copy.levels_);
108
116
  std::swap(items_, copy.items_);
109
117
  std::swap(items_size_, copy.items_size_);
110
- std::swap(min_value_, copy.min_value_);
111
- std::swap(max_value_, copy.max_value_);
112
- std::swap(is_level_zero_sorted_, copy.is_level_zero_sorted_);
118
+ std::swap(min_item_, copy.min_item_);
119
+ std::swap(max_item_, copy.max_item_);
120
+ reset_sorted_view();
113
121
  return *this;
114
122
  }
115
123
 
116
- template<typename T, typename C, typename S, typename A>
117
- kll_sketch<T, C, S, A>& kll_sketch<T, C, S, A>::operator=(kll_sketch&& other) {
124
+ template<typename T, typename C, typename A>
125
+ kll_sketch<T, C, A>& kll_sketch<T, C, A>::operator=(kll_sketch&& other) {
126
+ std::swap(comparator_, other.comparator_);
118
127
  std::swap(allocator_, other.allocator_);
119
128
  std::swap(k_, other.k_);
120
129
  std::swap(m_, other.m_);
121
130
  std::swap(min_k_, other.min_k_);
122
- std::swap(n_, other.n_);
123
131
  std::swap(num_levels_, other.num_levels_);
132
+ std::swap(is_level_zero_sorted_, other.is_level_zero_sorted_);
133
+ std::swap(n_, other.n_);
124
134
  std::swap(levels_, other.levels_);
125
135
  std::swap(items_, other.items_);
126
136
  std::swap(items_size_, other.items_size_);
127
- std::swap(min_value_, other.min_value_);
128
- std::swap(max_value_, other.max_value_);
129
- std::swap(is_level_zero_sorted_, other.is_level_zero_sorted_);
137
+ std::swap(min_item_, other.min_item_);
138
+ std::swap(max_item_, other.max_item_);
139
+ reset_sorted_view();
130
140
  return *this;
131
141
  }
132
142
 
133
- template<typename T, typename C, typename S, typename A>
134
- kll_sketch<T, C, S, A>::~kll_sketch() {
143
+ template<typename T, typename C, typename A>
144
+ kll_sketch<T, C, A>::~kll_sketch() {
135
145
  if (items_ != nullptr) {
136
146
  const uint32_t begin = levels_[0];
137
147
  const uint32_t end = levels_[num_levels_];
138
148
  for (uint32_t i = begin; i < end; i++) items_[i].~T();
139
149
  allocator_.deallocate(items_, items_size_);
140
150
  }
141
- if (min_value_ != nullptr) {
142
- min_value_->~T();
143
- allocator_.deallocate(min_value_, 1);
151
+ if (min_item_ != nullptr) {
152
+ min_item_->~T();
153
+ allocator_.deallocate(min_item_, 1);
144
154
  }
145
- if (max_value_ != nullptr) {
146
- max_value_->~T();
147
- allocator_.deallocate(max_value_, 1);
155
+ if (max_item_ != nullptr) {
156
+ max_item_->~T();
157
+ allocator_.deallocate(max_item_, 1);
148
158
  }
159
+ reset_sorted_view();
149
160
  }
150
161
 
151
- template<typename T, typename C, typename S, typename A>
152
- template<typename TT, typename CC, typename SS, typename AA>
153
- kll_sketch<T, C, S, A>::kll_sketch(const kll_sketch<TT, CC, SS, AA>& other, const A& allocator):
162
+ template<typename T, typename C, typename A>
163
+ template<typename TT, typename CC, typename AA>
164
+ kll_sketch<T, C, A>::kll_sketch(const kll_sketch<TT, CC, AA>& other, const C& comparator, const A& allocator):
165
+ comparator_(comparator),
154
166
  allocator_(allocator),
155
167
  k_(other.k_),
156
168
  m_(other.m_),
157
169
  min_k_(other.min_k_),
158
- n_(other.n_),
159
170
  num_levels_(other.num_levels_),
171
+ is_level_zero_sorted_(other.is_level_zero_sorted_),
172
+ n_(other.n_),
160
173
  levels_(other.levels_, allocator_),
161
174
  items_(nullptr),
162
175
  items_size_(other.items_size_),
163
- min_value_(nullptr),
164
- max_value_(nullptr),
165
- is_level_zero_sorted_(other.is_level_zero_sorted_)
176
+ min_item_(nullptr),
177
+ max_item_(nullptr),
178
+ sorted_view_(nullptr)
166
179
  {
167
180
  static_assert(
168
181
  std::is_constructible<T, TT>::value,
@@ -170,52 +183,53 @@ is_level_zero_sorted_(other.is_level_zero_sorted_)
170
183
  );
171
184
  items_ = allocator_.allocate(items_size_);
172
185
  for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
173
- if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
174
- if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
186
+ if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
187
+ if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
175
188
  check_sorting();
176
189
  }
177
190
 
178
- template<typename T, typename C, typename S, typename A>
191
+ template<typename T, typename C, typename A>
179
192
  template<typename FwdT>
180
- void kll_sketch<T, C, S, A>::update(FwdT&& value) {
181
- if (!check_update_value(value)) { return; }
182
- update_min_max(value);
193
+ void kll_sketch<T, C, A>::update(FwdT&& item) {
194
+ if (!check_update_item(item)) { return; }
195
+ update_min_max(item);
183
196
  const uint32_t index = internal_update();
184
- new (&items_[index]) T(std::forward<FwdT>(value));
197
+ new (&items_[index]) T(std::forward<FwdT>(item));
198
+ reset_sorted_view();
185
199
  }
186
200
 
187
- template<typename T, typename C, typename S, typename A>
188
- void kll_sketch<T, C, S, A>::update_min_max(const T& value) {
201
+ template<typename T, typename C, typename A>
202
+ void kll_sketch<T, C, A>::update_min_max(const T& item) {
189
203
  if (is_empty()) {
190
- min_value_ = new (allocator_.allocate(1)) T(value);
191
- max_value_ = new (allocator_.allocate(1)) T(value);
204
+ min_item_ = new (allocator_.allocate(1)) T(item);
205
+ max_item_ = new (allocator_.allocate(1)) T(item);
192
206
  } else {
193
- if (C()(value, *min_value_)) *min_value_ = value;
194
- if (C()(*max_value_, value)) *max_value_ = value;
207
+ if (comparator_(item, *min_item_)) *min_item_ = item;
208
+ if (comparator_(*max_item_, item)) *max_item_ = item;
195
209
  }
196
210
  }
197
211
 
198
- template<typename T, typename C, typename S, typename A>
199
- uint32_t kll_sketch<T, C, S, A>::internal_update() {
212
+ template<typename T, typename C, typename A>
213
+ uint32_t kll_sketch<T, C, A>::internal_update() {
200
214
  if (levels_[0] == 0) compress_while_updating();
201
215
  n_++;
202
216
  is_level_zero_sorted_ = false;
203
217
  return --levels_[0];
204
218
  }
205
219
 
206
- template<typename T, typename C, typename S, typename A>
220
+ template<typename T, typename C, typename A>
207
221
  template<typename FwdSk>
208
- void kll_sketch<T, C, S, A>::merge(FwdSk&& other) {
222
+ void kll_sketch<T, C, A>::merge(FwdSk&& other) {
209
223
  if (other.is_empty()) return;
210
224
  if (m_ != other.m_) {
211
225
  throw std::invalid_argument("incompatible M: " + std::to_string(m_) + " and " + std::to_string(other.m_));
212
226
  }
213
227
  if (is_empty()) {
214
- min_value_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.min_value_));
215
- max_value_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.max_value_));
228
+ min_item_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.min_item_));
229
+ max_item_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.max_item_));
216
230
  } else {
217
- if (C()(*other.min_value_, *min_value_)) *min_value_ = conditional_forward<FwdSk>(*other.min_value_);
218
- if (C()(*max_value_, *other.max_value_)) *max_value_ = conditional_forward<FwdSk>(*other.max_value_);
231
+ if (comparator_(*other.min_item_, *min_item_)) *min_item_ = conditional_forward<FwdSk>(*other.min_item_);
232
+ if (comparator_(*max_item_, *other.max_item_)) *max_item_ = conditional_forward<FwdSk>(*other.max_item_);
219
233
  }
220
234
  const uint64_t final_n = n_ + other.n_;
221
235
  for (uint32_t i = other.levels_[0]; i < other.levels_[1]; i++) {
@@ -226,149 +240,133 @@ void kll_sketch<T, C, S, A>::merge(FwdSk&& other) {
226
240
  n_ = final_n;
227
241
  if (other.is_estimation_mode()) min_k_ = std::min(min_k_, other.min_k_);
228
242
  assert_correct_total_weight();
243
+ reset_sorted_view();
229
244
  }
230
245
 
231
- template<typename T, typename C, typename S, typename A>
232
- bool kll_sketch<T, C, S, A>::is_empty() const {
246
+ template<typename T, typename C, typename A>
247
+ bool kll_sketch<T, C, A>::is_empty() const {
233
248
  return n_ == 0;
234
249
  }
235
250
 
236
- template<typename T, typename C, typename S, typename A>
237
- uint16_t kll_sketch<T, C, S, A>::get_k() const {
251
+ template<typename T, typename C, typename A>
252
+ uint16_t kll_sketch<T, C, A>::get_k() const {
238
253
  return k_;
239
254
  }
240
255
 
241
- template<typename T, typename C, typename S, typename A>
242
- uint64_t kll_sketch<T, C, S, A>::get_n() const {
256
+ template<typename T, typename C, typename A>
257
+ uint64_t kll_sketch<T, C, A>::get_n() const {
243
258
  return n_;
244
259
  }
245
260
 
246
- template<typename T, typename C, typename S, typename A>
247
- uint32_t kll_sketch<T, C, S, A>::get_num_retained() const {
261
+ template<typename T, typename C, typename A>
262
+ uint32_t kll_sketch<T, C, A>::get_num_retained() const {
248
263
  return levels_[num_levels_] - levels_[0];
249
264
  }
250
265
 
251
- template<typename T, typename C, typename S, typename A>
252
- bool kll_sketch<T, C, S, A>::is_estimation_mode() const {
266
+ template<typename T, typename C, typename A>
267
+ bool kll_sketch<T, C, A>::is_estimation_mode() const {
253
268
  return num_levels_ > 1;
254
269
  }
255
270
 
256
- template<typename T, typename C, typename S, typename A>
257
- T kll_sketch<T, C, S, A>::get_min_value() const {
258
- if (is_empty()) return get_invalid_value();
259
- return *min_value_;
271
+ template<typename T, typename C, typename A>
272
+ T kll_sketch<T, C, A>::get_min_item() const {
273
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
274
+ return *min_item_;
260
275
  }
261
276
 
262
- template<typename T, typename C, typename S, typename A>
263
- T kll_sketch<T, C, S, A>::get_max_value() const {
264
- if (is_empty()) return get_invalid_value();
265
- return *max_value_;
277
+ template<typename T, typename C, typename A>
278
+ T kll_sketch<T, C, A>::get_max_item() const {
279
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
280
+ return *max_item_;
266
281
  }
267
282
 
268
- template<typename T, typename C, typename S, typename A>
269
- C kll_sketch<T, C, S, A>::get_comparator() const {
270
- return C();
283
+ template<typename T, typename C, typename A>
284
+ C kll_sketch<T, C, A>::get_comparator() const {
285
+ return comparator_;
271
286
  }
272
287
 
273
- template<typename T, typename C, typename S, typename A>
274
- template<bool inclusive>
275
- auto kll_sketch<T, C, S, A>::get_quantile(double rank) const -> quantile_return_type {
276
- if (is_empty()) return get_invalid_value();
277
- if (rank == 0.0) return *min_value_;
278
- if (rank == 1.0) return *max_value_;
288
+ template<typename T, typename C, typename A>
289
+ A kll_sketch<T, C, A>::get_allocator() const {
290
+ return allocator_;
291
+ }
292
+
293
+ template<typename T, typename C, typename A>
294
+ double kll_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
295
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
296
+ setup_sorted_view();
297
+ return sorted_view_->get_rank(item, inclusive);
298
+ }
299
+
300
+ template<typename T, typename C, typename A>
301
+ auto kll_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
302
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
303
+ setup_sorted_view();
304
+ return sorted_view_->get_PMF(split_points, size, inclusive);
305
+ }
306
+
307
+ template<typename T, typename C, typename A>
308
+ auto kll_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
309
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
310
+ setup_sorted_view();
311
+ return sorted_view_->get_CDF(split_points, size, inclusive);
312
+ }
313
+
314
+ template<typename T, typename C, typename A>
315
+ auto kll_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
316
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
279
317
  if ((rank < 0.0) || (rank > 1.0)) {
280
- throw std::invalid_argument("Fraction cannot be less than zero or greater than 1.0");
318
+ throw std::invalid_argument("normalized rank cannot be less than zero or greater than 1.0");
281
319
  }
282
320
  // may have a side effect of sorting level zero if needed
283
- return get_sorted_view<inclusive>(true).get_quantile(rank);
321
+ setup_sorted_view();
322
+ return sorted_view_->get_quantile(rank, inclusive);
284
323
  }
285
324
 
286
- template<typename T, typename C, typename S, typename A>
287
- template<bool inclusive>
288
- std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(const double* ranks, uint32_t size) const {
325
+ template<typename T, typename C, typename A>
326
+ std::vector<T, A> kll_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
327
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
289
328
  std::vector<T, A> quantiles(allocator_);
290
- if (is_empty()) return quantiles;
291
329
  quantiles.reserve(size);
292
330
 
293
331
  // may have a side effect of sorting level zero if needed
294
- auto view = get_sorted_view<inclusive>(true);
332
+ setup_sorted_view();
295
333
 
296
334
  for (uint32_t i = 0; i < size; i++) {
297
335
  const double rank = ranks[i];
298
336
  if ((rank < 0.0) || (rank > 1.0)) {
299
- throw std::invalid_argument("Fraction cannot be less than zero or greater than 1.0");
300
- }
301
- else if (rank == 0.0) quantiles.push_back(*min_value_);
302
- else if (rank == 1.0) quantiles.push_back(*max_value_);
303
- else {
304
- quantiles.push_back(view.get_quantile(rank));
337
+ throw std::invalid_argument("normalized rank cannot be less than 0 or greater than 1");
305
338
  }
339
+ quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
306
340
  }
307
341
  return quantiles;
308
342
  }
309
343
 
310
- template<typename T, typename C, typename S, typename A>
311
- template<bool inclusive>
312
- std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(uint32_t num) const {
313
- if (is_empty()) return std::vector<T, A>(allocator_);
344
+ template<typename T, typename C, typename A>
345
+ std::vector<T, A> kll_sketch<T, C, A>::get_quantiles(uint32_t num, bool inclusive) const {
346
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
314
347
  if (num == 0) {
315
348
  throw std::invalid_argument("num must be > 0");
316
349
  }
317
- vector_d<A> fractions(num, 0, allocator_);
318
- fractions[0] = 0.0;
350
+ vector_double ranks(num, 0, allocator_);
351
+ ranks[0] = 0.0;
319
352
  for (size_t i = 1; i < num; i++) {
320
- fractions[i] = static_cast<double>(i) / (num - 1);
353
+ ranks[i] = static_cast<double>(i) / (num - 1);
321
354
  }
322
355
  if (num > 1) {
323
- fractions[num - 1] = 1.0;
324
- }
325
- return get_quantiles<inclusive>(fractions.data(), num);
326
- }
327
-
328
- template<typename T, typename C, typename S, typename A>
329
- template<bool inclusive>
330
- double kll_sketch<T, C, S, A>::get_rank(const T& value) const {
331
- if (is_empty()) return std::numeric_limits<double>::quiet_NaN();
332
- uint8_t level = 0;
333
- uint64_t weight = 1;
334
- uint64_t total = 0;
335
- while (level < num_levels_) {
336
- const auto from_index = levels_[level];
337
- const auto to_index = levels_[level + 1]; // exclusive
338
- for (uint32_t i = from_index; i < to_index; i++) {
339
- if (inclusive ? !C()(value, items_[i]) : C()(items_[i], value)) {
340
- total += weight;
341
- } else if ((level > 0) || is_level_zero_sorted_) {
342
- break; // levels above 0 are sorted, no point comparing further
343
- }
344
- }
345
- level++;
346
- weight *= 2;
356
+ ranks[num - 1] = 1.0;
347
357
  }
348
- return (double) total / n_;
349
- }
350
-
351
- template<typename T, typename C, typename S, typename A>
352
- template<bool inclusive>
353
- vector_d<A> kll_sketch<T, C, S, A>::get_PMF(const T* split_points, uint32_t size) const {
354
- return get_PMF_or_CDF<inclusive>(split_points, size, false);
355
- }
356
-
357
- template<typename T, typename C, typename S, typename A>
358
- template<bool inclusive>
359
- vector_d<A> kll_sketch<T, C, S, A>::get_CDF(const T* split_points, uint32_t size) const {
360
- return get_PMF_or_CDF<inclusive>(split_points, size, true);
358
+ return get_quantiles(ranks.data(), num, inclusive);
361
359
  }
362
360
 
363
- template<typename T, typename C, typename S, typename A>
364
- double kll_sketch<T, C, S, A>::get_normalized_rank_error(bool pmf) const {
361
+ template<typename T, typename C, typename A>
362
+ double kll_sketch<T, C, A>::get_normalized_rank_error(bool pmf) const {
365
363
  return get_normalized_rank_error(min_k_, pmf);
366
364
  }
367
365
 
368
366
  // implementation for fixed-size arithmetic types (integral and floating point)
369
- template<typename T, typename C, typename S, typename A>
367
+ template<typename T, typename C, typename A>
370
368
  template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
371
- size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe&) const {
369
+ size_t kll_sketch<T, C, A>::get_serialized_size_bytes(const SerDe&) const {
372
370
  if (is_empty()) { return EMPTY_SIZE_BYTES; }
373
371
  if (num_levels_ == 1 && get_num_retained() == 1) {
374
372
  return DATA_START_SINGLE_ITEM + sizeof(TT);
@@ -378,25 +376,25 @@ size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe&) const {
378
376
  }
379
377
 
380
378
  // implementation for all other types
381
- template<typename T, typename C, typename S, typename A>
379
+ template<typename T, typename C, typename A>
382
380
  template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
383
- size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const {
381
+ size_t kll_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& sd) const {
384
382
  if (is_empty()) { return EMPTY_SIZE_BYTES; }
385
383
  if (num_levels_ == 1 && get_num_retained() == 1) {
386
384
  return DATA_START_SINGLE_ITEM + sd.size_of_item(items_[levels_[0]]);
387
385
  }
388
386
  // the last integer in the levels_ array is not serialized because it can be derived
389
387
  size_t size = DATA_START + num_levels_ * sizeof(uint32_t);
390
- size += sd.size_of_item(*min_value_);
391
- size += sd.size_of_item(*max_value_);
388
+ size += sd.size_of_item(*min_item_);
389
+ size += sd.size_of_item(*max_item_);
392
390
  for (auto it: *this) size += sd.size_of_item(it.first);
393
391
  return size;
394
392
  }
395
393
 
396
394
  // implementation for fixed-size arithmetic types (integral and floating point)
397
- template<typename T, typename C, typename S, typename A>
395
+ template<typename T, typename C, typename A>
398
396
  template<typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
399
- size_t kll_sketch<T, C, S, A>::get_max_serialized_size_bytes(uint16_t k, uint64_t n) {
397
+ size_t kll_sketch<T, C, A>::get_max_serialized_size_bytes(uint16_t k, uint64_t n) {
400
398
  const uint8_t num_levels = kll_helper::ub_on_num_levels(n);
401
399
  const uint32_t max_num_retained = kll_helper::compute_total_capacity(k, DEFAULT_M, num_levels);
402
400
  // the last integer in the levels_ array is not serialized because it can be derived
@@ -404,18 +402,18 @@ size_t kll_sketch<T, C, S, A>::get_max_serialized_size_bytes(uint16_t k, uint64_
404
402
  }
405
403
 
406
404
  // implementation for all other types
407
- template<typename T, typename C, typename S, typename A>
405
+ template<typename T, typename C, typename A>
408
406
  template<typename TT, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
409
- size_t kll_sketch<T, C, S, A>::get_max_serialized_size_bytes(uint16_t k, uint64_t n, size_t max_item_size_bytes) {
407
+ size_t kll_sketch<T, C, A>::get_max_serialized_size_bytes(uint16_t k, uint64_t n, size_t max_item_size_bytes) {
410
408
  const uint8_t num_levels = kll_helper::ub_on_num_levels(n);
411
409
  const uint32_t max_num_retained = kll_helper::compute_total_capacity(k, DEFAULT_M, num_levels);
412
410
  // the last integer in the levels_ array is not serialized because it can be derived
413
411
  return DATA_START + num_levels * sizeof(uint32_t) + (max_num_retained + 2) * max_item_size_bytes;
414
412
  }
415
413
 
416
- template<typename T, typename C, typename S, typename A>
414
+ template<typename T, typename C, typename A>
417
415
  template<typename SerDe>
418
- void kll_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const {
416
+ void kll_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& sd) const {
419
417
  const bool is_single_item = n_ == 1;
420
418
  const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
421
419
  write(os, preamble_ints);
@@ -440,18 +438,18 @@ void kll_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const
440
438
  write(os, num_levels_);
441
439
  write(os, unused);
442
440
  write(os, levels_.data(), sizeof(levels_[0]) * num_levels_);
443
- sd.serialize(os, min_value_, 1);
444
- sd.serialize(os, max_value_, 1);
441
+ sd.serialize(os, min_item_, 1);
442
+ sd.serialize(os, max_item_, 1);
445
443
  }
446
444
  sd.serialize(os, &items_[levels_[0]], get_num_retained());
447
445
  }
448
446
 
449
- template<typename T, typename C, typename S, typename A>
447
+ template<typename T, typename C, typename A>
450
448
  template<typename SerDe>
451
- vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
449
+ auto kll_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
452
450
  const bool is_single_item = n_ == 1;
453
451
  const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
454
- vector_u8<A> bytes(size, 0, allocator_);
452
+ vector_bytes bytes(size, 0, allocator_);
455
453
  uint8_t* ptr = bytes.data() + header_size_bytes;
456
454
  const uint8_t* end_ptr = ptr + size;
457
455
  const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
@@ -476,25 +474,22 @@ vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const
476
474
  ptr += copy_to_mem(num_levels_, ptr);
477
475
  ptr += sizeof(uint8_t); // unused
478
476
  ptr += copy_to_mem(levels_.data(), ptr, sizeof(levels_[0]) * num_levels_);
479
- ptr += sd.serialize(ptr, end_ptr - ptr, min_value_, 1);
480
- ptr += sd.serialize(ptr, end_ptr - ptr, max_value_, 1);
477
+ ptr += sd.serialize(ptr, end_ptr - ptr, min_item_, 1);
478
+ ptr += sd.serialize(ptr, end_ptr - ptr, max_item_, 1);
481
479
  }
482
480
  const size_t bytes_remaining = end_ptr - ptr;
483
481
  ptr += sd.serialize(ptr, bytes_remaining, &items_[levels_[0]], get_num_retained());
484
482
  }
485
483
  const size_t delta = ptr - bytes.data();
486
- if (delta != size) throw std::logic_error("serialized size mismatch: " + std::to_string(delta) + " != " + std::to_string(size));
484
+ if (delta != size) throw std::logic_error("serialized size mismatch: " + std::to_string(delta)
485
+ + " != " + std::to_string(size));
487
486
  return bytes;
488
487
  }
489
488
 
490
- template<typename T, typename C, typename S, typename A>
491
- kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, const A& allocator) {
492
- return deserialize(is, S(), allocator);
493
- }
494
-
495
- template<typename T, typename C, typename S, typename A>
489
+ template<typename T, typename C, typename A>
496
490
  template<typename SerDe>
497
- kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
491
+ kll_sketch<T, C, A> kll_sketch<T, C, A>::deserialize(std::istream& is, const SerDe& sd,
492
+ const C& comparator, const A& allocator) {
498
493
  const auto preamble_ints = read<uint8_t>(is);
499
494
  const auto serial_version = read<uint8_t>(is);
500
495
  const auto family_id = read<uint8_t>(is);
@@ -510,7 +505,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, con
510
505
 
511
506
  if (!is.good()) throw std::runtime_error("error reading from std::istream");
512
507
  const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
513
- if (is_empty) return kll_sketch(k, allocator);
508
+ if (is_empty) return kll_sketch(k, comparator, allocator);
514
509
 
515
510
  uint64_t n;
516
511
  uint16_t min_k;
@@ -526,7 +521,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, con
526
521
  num_levels = read<uint8_t>(is);
527
522
  read<uint8_t>(is); // skip unused byte
528
523
  }
529
- vector_u32<A> levels(num_levels + 1, 0, allocator);
524
+ vector_u32 levels(num_levels + 1, 0, allocator);
530
525
  const uint32_t capacity(kll_helper::compute_total_capacity(k, m, num_levels));
531
526
  if (is_single_item) {
532
527
  levels[0] = capacity - 1;
@@ -537,17 +532,17 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, con
537
532
  levels[num_levels] = capacity;
538
533
  A alloc(allocator);
539
534
  auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
540
- std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
541
- std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
542
- std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
543
- std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
535
+ std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
536
+ std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
537
+ std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
538
+ std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
544
539
  if (!is_single_item) {
545
- sd.deserialize(is, min_value_buffer.get(), 1);
540
+ sd.deserialize(is, min_item_buffer.get(), 1);
546
541
  // serde call did not throw, repackage with destrtuctor
547
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
548
- sd.deserialize(is, max_value_buffer.get(), 1);
542
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
543
+ sd.deserialize(is, max_item_buffer.get(), 1);
549
544
  // serde call did not throw, repackage with destrtuctor
550
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
545
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
551
546
  }
552
547
  auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
553
548
  std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(alloc.allocate(capacity), items_buffer_deleter);
@@ -557,27 +552,23 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, con
557
552
  std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity, allocator));
558
553
  const bool is_level_zero_sorted = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
559
554
  if (is_single_item) {
560
- new (min_value_buffer.get()) T(items.get()[levels[0]]);
555
+ new (min_item_buffer.get()) T(items.get()[levels[0]]);
561
556
  // copy did not throw, repackage with destrtuctor
562
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
563
- new (max_value_buffer.get()) T(items.get()[levels[0]]);
557
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
558
+ new (max_item_buffer.get()) T(items.get()[levels[0]]);
564
559
  // copy did not throw, repackage with destrtuctor
565
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
560
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
566
561
  }
567
562
  if (!is.good())
568
563
  throw std::runtime_error("error reading from std::istream");
569
564
  return kll_sketch(k, min_k, n, num_levels, std::move(levels), std::move(items), capacity,
570
- std::move(min_value), std::move(max_value), is_level_zero_sorted);
571
- }
572
-
573
- template<typename T, typename C, typename S, typename A>
574
- kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
575
- return deserialize(bytes, size, S(), allocator);
565
+ std::move(min_item), std::move(max_item), is_level_zero_sorted, comparator);
576
566
  }
577
567
 
578
- template<typename T, typename C, typename S, typename A>
568
+ template<typename T, typename C, typename A>
579
569
  template<typename SerDe>
580
- kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
570
+ kll_sketch<T, C, A> kll_sketch<T, C, A>::deserialize(const void* bytes, size_t size, const SerDe& sd,
571
+ const C& comparator, const A& allocator) {
581
572
  ensure_minimum_memory(size, 8);
582
573
  const char* ptr = static_cast<const char*>(bytes);
583
574
  uint8_t preamble_ints;
@@ -601,7 +592,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
601
592
  ensure_minimum_memory(size, preamble_ints * sizeof(uint32_t));
602
593
 
603
594
  const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
604
- if (is_empty) return kll_sketch<T, C, S, A>(k, allocator);
595
+ if (is_empty) return kll_sketch(k, comparator, allocator);
605
596
 
606
597
  uint64_t n;
607
598
  uint16_t min_k;
@@ -618,7 +609,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
618
609
  ptr += copy_from_mem(ptr, num_levels);
619
610
  ptr += sizeof(uint8_t); // skip unused byte
620
611
  }
621
- vector_u32<A> levels(num_levels + 1, 0, allocator);
612
+ vector_u32 levels(num_levels + 1, 0, allocator);
622
613
  const uint32_t capacity(kll_helper::compute_total_capacity(k, m, num_levels));
623
614
  if (is_single_item) {
624
615
  levels[0] = capacity - 1;
@@ -629,17 +620,17 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
629
620
  levels[num_levels] = capacity;
630
621
  A alloc(allocator);
631
622
  auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
632
- std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
633
- std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
634
- std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
635
- std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
623
+ std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
624
+ std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
625
+ std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
626
+ std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
636
627
  if (!is_single_item) {
637
- ptr += sd.deserialize(ptr, end_ptr - ptr, min_value_buffer.get(), 1);
628
+ ptr += sd.deserialize(ptr, end_ptr - ptr, min_item_buffer.get(), 1);
638
629
  // serde call did not throw, repackage with destrtuctor
639
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
640
- ptr += sd.deserialize(ptr, end_ptr - ptr, max_value_buffer.get(), 1);
630
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
631
+ ptr += sd.deserialize(ptr, end_ptr - ptr, max_item_buffer.get(), 1);
641
632
  // serde call did not throw, repackage with destrtuctor
642
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
633
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
643
634
  }
644
635
  auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
645
636
  std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(alloc.allocate(capacity), items_buffer_deleter);
@@ -651,15 +642,15 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
651
642
  if (delta != size) throw std::logic_error("deserialized size mismatch: " + std::to_string(delta) + " != " + std::to_string(size));
652
643
  const bool is_level_zero_sorted = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
653
644
  if (is_single_item) {
654
- new (min_value_buffer.get()) T(items.get()[levels[0]]);
645
+ new (min_item_buffer.get()) T(items.get()[levels[0]]);
655
646
  // copy did not throw, repackage with destrtuctor
656
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
657
- new (max_value_buffer.get()) T(items.get()[levels[0]]);
647
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
648
+ new (max_item_buffer.get()) T(items.get()[levels[0]]);
658
649
  // copy did not throw, repackage with destrtuctor
659
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
650
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
660
651
  }
661
652
  return kll_sketch(k, min_k, n, num_levels, std::move(levels), std::move(items), capacity,
662
- std::move(min_value), std::move(max_value), is_level_zero_sorted);
653
+ std::move(min_item), std::move(max_item), is_level_zero_sorted, comparator);
663
654
  }
664
655
 
665
656
  /*
@@ -669,36 +660,38 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
669
660
  * Otherwise, it is the "single-sided" normalized rank error for all the other queries.
670
661
  * Constants were derived as the best fit to 99 percentile empirically measured max error in thousands of trials
671
662
  */
672
- template<typename T, typename C, typename S, typename A>
673
- double kll_sketch<T, C, S, A>::get_normalized_rank_error(uint16_t k, bool pmf) {
663
+ template<typename T, typename C, typename A>
664
+ double kll_sketch<T, C, A>::get_normalized_rank_error(uint16_t k, bool pmf) {
674
665
  return pmf
675
666
  ? 2.446 / pow(k, 0.9433)
676
667
  : 2.296 / pow(k, 0.9723);
677
668
  }
678
669
 
679
670
  // for deserialization
680
- template<typename T, typename C, typename S, typename A>
681
- kll_sketch<T, C, S, A>::kll_sketch(uint16_t k, uint16_t min_k, uint64_t n, uint8_t num_levels, vector_u32<A>&& levels,
682
- std::unique_ptr<T, items_deleter> items, uint32_t items_size, std::unique_ptr<T, item_deleter> min_value,
683
- std::unique_ptr<T, item_deleter> max_value, bool is_level_zero_sorted):
671
+ template<typename T, typename C, typename A>
672
+ kll_sketch<T, C, A>::kll_sketch(uint16_t k, uint16_t min_k, uint64_t n, uint8_t num_levels, vector_u32&& levels,
673
+ std::unique_ptr<T, items_deleter> items, uint32_t items_size, std::unique_ptr<T, item_deleter> min_item,
674
+ std::unique_ptr<T, item_deleter> max_item, bool is_level_zero_sorted, const C& comparator):
675
+ comparator_(comparator),
684
676
  allocator_(levels.get_allocator()),
685
677
  k_(k),
686
678
  m_(DEFAULT_M),
687
679
  min_k_(min_k),
688
- n_(n),
689
680
  num_levels_(num_levels),
681
+ is_level_zero_sorted_(is_level_zero_sorted),
682
+ n_(n),
690
683
  levels_(std::move(levels)),
691
684
  items_(items.release()),
692
685
  items_size_(items_size),
693
- min_value_(min_value.release()),
694
- max_value_(max_value.release()),
695
- is_level_zero_sorted_(is_level_zero_sorted)
686
+ min_item_(min_item.release()),
687
+ max_item_(max_item.release()),
688
+ sorted_view_(nullptr)
696
689
  {}
697
690
 
698
691
  // The following code is only valid in the special case of exactly reaching capacity while updating.
699
692
  // It cannot be used while merging, while reducing k, or anything else.
700
- template<typename T, typename C, typename S, typename A>
701
- void kll_sketch<T, C, S, A>::compress_while_updating(void) {
693
+ template<typename T, typename C, typename A>
694
+ void kll_sketch<T, C, A>::compress_while_updating(void) {
702
695
  const uint8_t level = find_level_to_compact();
703
696
 
704
697
  // It is important to add the new top level right here. Be aware that this operation
@@ -722,7 +715,7 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
722
715
  // level zero might not be sorted, so we must sort it if we wish to compact it
723
716
  // sort_level_zero() is not used here because of the adjustment for odd number of items
724
717
  if ((level == 0) && !is_level_zero_sorted_) {
725
- std::sort(items_ + adj_beg, items_ + adj_beg + adj_pop, C());
718
+ std::sort(items_ + adj_beg, items_ + adj_beg + adj_pop, comparator_);
726
719
  }
727
720
  if (pop_above == 0) {
728
721
  kll_helper::randomly_halve_up(items_, adj_beg, adj_pop);
@@ -751,8 +744,8 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
751
744
  for (uint32_t i = 0; i < half_adj_pop; i++) items_[i + destroy_beg].~T();
752
745
  }
753
746
 
754
- template<typename T, typename C, typename S, typename A>
755
- uint8_t kll_sketch<T, C, S, A>::find_level_to_compact() const {
747
+ template<typename T, typename C, typename A>
748
+ uint8_t kll_sketch<T, C, A>::find_level_to_compact() const {
756
749
  uint8_t level = 0;
757
750
  while (true) {
758
751
  if (level >= num_levels_) throw std::logic_error("capacity calculation error");
@@ -765,8 +758,8 @@ uint8_t kll_sketch<T, C, S, A>::find_level_to_compact() const {
765
758
  }
766
759
  }
767
760
 
768
- template<typename T, typename C, typename S, typename A>
769
- void kll_sketch<T, C, S, A>::add_empty_top_level_to_completely_full_sketch() {
761
+ template<typename T, typename C, typename A>
762
+ void kll_sketch<T, C, A>::add_empty_top_level_to_completely_full_sketch() {
770
763
  const uint32_t cur_total_cap = levels_[num_levels_];
771
764
 
772
765
  // make sure that we are following a certain growth scheme
@@ -800,124 +793,50 @@ void kll_sketch<T, C, S, A>::add_empty_top_level_to_completely_full_sketch() {
800
793
  levels_[num_levels_] = new_total_cap; // initialize the new "extra" index at the top
801
794
  }
802
795
 
803
- template<typename T, typename C, typename S, typename A>
804
- void kll_sketch<T, C, S, A>::sort_level_zero() {
796
+ template<typename T, typename C, typename A>
797
+ void kll_sketch<T, C, A>::sort_level_zero() {
805
798
  if (!is_level_zero_sorted_) {
806
- std::sort(items_ + levels_[0], items_ + levels_[1], C());
799
+ std::sort(items_ + levels_[0], items_ + levels_[1], comparator_);
807
800
  is_level_zero_sorted_ = true;
808
801
  }
809
802
  }
810
803
 
811
- template<typename T, typename C, typename S, typename A>
812
- void kll_sketch<T, C, S, A>::check_sorting() const {
804
+ template<typename T, typename C, typename A>
805
+ void kll_sketch<T, C, A>::check_sorting() const {
813
806
  // not checking level 0
814
807
  for (uint8_t level = 1; level < num_levels_; ++level) {
815
808
  const auto from = items_ + levels_[level];
816
809
  const auto to = items_ + levels_[level + 1];
817
- if (!std::is_sorted(from, to, C())) {
810
+ if (!std::is_sorted(from, to, comparator_)) {
818
811
  throw std::logic_error("levels must be sorted");
819
812
  }
820
813
  }
821
814
  }
822
815
 
823
- template<typename T, typename C, typename S, typename A>
824
- template<bool inclusive>
825
- quantile_sketch_sorted_view<T, C, A> kll_sketch<T, C, S, A>::get_sorted_view(bool cumulative) const {
816
+ template<typename T, typename C, typename A>
817
+ quantiles_sorted_view<T, C, A> kll_sketch<T, C, A>::get_sorted_view() const {
826
818
  const_cast<kll_sketch*>(this)->sort_level_zero(); // allow this side effect
827
- quantile_sketch_sorted_view<T, C, A> view(get_num_retained(), allocator_);
819
+ quantiles_sorted_view<T, C, A> view(get_num_retained(), comparator_, allocator_);
828
820
  for (uint8_t level = 0; level < num_levels_; ++level) {
829
821
  const auto from = items_ + levels_[level];
830
822
  const auto to = items_ + levels_[level + 1]; // exclusive
831
823
  view.add(from, to, 1 << level);
832
824
  }
833
- if (cumulative) view.template convert_to_cummulative<inclusive>();
825
+ view.convert_to_cummulative();
834
826
  return view;
835
827
  }
836
828
 
837
- template<typename T, typename C, typename S, typename A>
838
- template<bool inclusive>
839
- vector_d<A> kll_sketch<T, C, S, A>::get_PMF_or_CDF(const T* split_points, uint32_t size, bool is_CDF) const {
840
- if (is_empty()) return vector_d<A>(allocator_);
841
- kll_helper::validate_values<T, C>(split_points, size);
842
- vector_d<A> buckets(size + 1, 0, allocator_);
843
- uint8_t level = 0;
844
- uint64_t weight = 1;
845
- while (level < num_levels_) {
846
- const auto from_index = levels_[level];
847
- const auto to_index = levels_[level + 1]; // exclusive
848
- if ((level == 0) && !is_level_zero_sorted_) {
849
- increment_buckets_unsorted_level<inclusive>(from_index, to_index, weight, split_points, size, buckets.data());
850
- } else {
851
- increment_buckets_sorted_level<inclusive>(from_index, to_index, weight, split_points, size, buckets.data());
852
- }
853
- level++;
854
- weight *= 2;
855
- }
856
- // normalize and, if CDF, convert to cumulative
857
- if (is_CDF) {
858
- double subtotal = 0;
859
- for (uint32_t i = 0; i <= size; i++) {
860
- subtotal += buckets[i];
861
- buckets[i] = subtotal / n_;
862
- }
863
- } else {
864
- for (uint32_t i = 0; i <= size; i++) {
865
- buckets[i] /= n_;
866
- }
867
- }
868
- return buckets;
869
- }
870
-
871
- template<typename T, typename C, typename S, typename A>
872
- template<bool inclusive>
873
- void kll_sketch<T, C, S, A>::increment_buckets_unsorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
874
- const T* split_points, uint32_t size, double* buckets) const
875
- {
876
- for (uint32_t i = from_index; i < to_index; i++) {
877
- uint32_t j;
878
- for (j = 0; j < size; j++) {
879
- if (inclusive ? !C()(split_points[j], items_[i]) : C()(items_[i], split_points[j])) {
880
- break;
881
- }
882
- }
883
- buckets[j] += weight;
884
- }
885
- }
886
-
887
- template<typename T, typename C, typename S, typename A>
888
- template<bool inclusive>
889
- void kll_sketch<T, C, S, A>::increment_buckets_sorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
890
- const T* split_points, uint32_t size, double* buckets) const
891
- {
892
- uint32_t i = from_index;
893
- uint32_t j = 0;
894
- while ((i < to_index) && (j < size)) {
895
- if (inclusive ? !C()(split_points[j], items_[i]) : C()(items_[i], split_points[j])) {
896
- buckets[j] += weight; // this sample goes into this bucket
897
- i++; // move on to next sample and see whether it also goes into this bucket
898
- } else {
899
- j++; // no more samples for this bucket
900
- }
901
- }
902
- // now either i == to_index (we are out of samples), or
903
- // j == size (we are out of buckets, but there are more samples remaining)
904
- // we only need to do something in the latter case
905
- if (j == size) {
906
- buckets[j] += weight * (to_index - i);
907
- }
908
- }
909
-
910
- template<typename T, typename C, typename S, typename A>
829
+ template<typename T, typename C, typename A>
911
830
  template<typename O>
912
- void kll_sketch<T, C, S, A>::merge_higher_levels(O&& other, uint64_t final_n) {
831
+ void kll_sketch<T, C, A>::merge_higher_levels(O&& other, uint64_t final_n) {
913
832
  const uint32_t tmp_num_items = get_num_retained() + other.get_num_retained_above_level_zero();
914
833
  A alloc(allocator_);
915
834
  auto tmp_items_deleter = [tmp_num_items, &alloc](T* ptr) { alloc.deallocate(ptr, tmp_num_items); }; // no destructor needed
916
835
  const std::unique_ptr<T, decltype(tmp_items_deleter)> workbuf(allocator_.allocate(tmp_num_items), tmp_items_deleter);
917
836
  const uint8_t ub = kll_helper::ub_on_num_levels(final_n);
918
837
  const size_t work_levels_size = ub + 2; // ub+1 does not work
919
- vector_u32<A> worklevels(work_levels_size, 0, allocator_);
920
- vector_u32<A> outlevels(work_levels_size, 0, allocator_);
838
+ vector_u32 worklevels(work_levels_size, 0, allocator_);
839
+ vector_u32 outlevels(work_levels_size, 0, allocator_);
921
840
 
922
841
  const uint8_t provisional_num_levels = std::max(num_levels_, other.num_levels_);
923
842
 
@@ -950,9 +869,9 @@ void kll_sketch<T, C, S, A>::merge_higher_levels(O&& other, uint64_t final_n) {
950
869
  }
951
870
 
952
871
  // this leaves items_ uninitialized (all objects moved out and destroyed)
953
- template<typename T, typename C, typename S, typename A>
872
+ template<typename T, typename C, typename A>
954
873
  template<typename FwdSk>
955
- void kll_sketch<T, C, S, A>::populate_work_arrays(FwdSk&& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels) {
874
+ void kll_sketch<T, C, A>::populate_work_arrays(FwdSk&& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels) {
956
875
  worklevels[0] = 0;
957
876
 
958
877
  // the level zero data from "other" was already inserted into "this"
@@ -976,36 +895,36 @@ void kll_sketch<T, C, S, A>::populate_work_arrays(FwdSk&& other, T* workbuf, uin
976
895
  }
977
896
  }
978
897
 
979
- template<typename T, typename C, typename S, typename A>
980
- void kll_sketch<T, C, S, A>::assert_correct_total_weight() const {
898
+ template<typename T, typename C, typename A>
899
+ void kll_sketch<T, C, A>::assert_correct_total_weight() const {
981
900
  const uint64_t total(kll_helper::sum_the_sample_weights(num_levels_, levels_.data()));
982
901
  if (total != n_) {
983
902
  throw std::logic_error("Total weight does not match N");
984
903
  }
985
904
  }
986
905
 
987
- template<typename T, typename C, typename S, typename A>
988
- uint32_t kll_sketch<T, C, S, A>::safe_level_size(uint8_t level) const {
906
+ template<typename T, typename C, typename A>
907
+ uint32_t kll_sketch<T, C, A>::safe_level_size(uint8_t level) const {
989
908
  if (level >= num_levels_) return 0;
990
909
  return levels_[level + 1] - levels_[level];
991
910
  }
992
911
 
993
- template<typename T, typename C, typename S, typename A>
994
- uint32_t kll_sketch<T, C, S, A>::get_num_retained_above_level_zero() const {
912
+ template<typename T, typename C, typename A>
913
+ uint32_t kll_sketch<T, C, A>::get_num_retained_above_level_zero() const {
995
914
  if (num_levels_ == 1) return 0;
996
915
  return levels_[num_levels_] - levels_[1];
997
916
  }
998
917
 
999
- template<typename T, typename C, typename S, typename A>
1000
- void kll_sketch<T, C, S, A>::check_m(uint8_t m) {
918
+ template<typename T, typename C, typename A>
919
+ void kll_sketch<T, C, A>::check_m(uint8_t m) {
1001
920
  if (m != DEFAULT_M) {
1002
921
  throw std::invalid_argument("Possible corruption: M must be " + std::to_string(DEFAULT_M)
1003
922
  + ": " + std::to_string(m));
1004
923
  }
1005
924
  }
1006
925
 
1007
- template<typename T, typename C, typename S, typename A>
1008
- void kll_sketch<T, C, S, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t flags_byte) {
926
+ template<typename T, typename C, typename A>
927
+ void kll_sketch<T, C, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t flags_byte) {
1009
928
  const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
1010
929
  const bool is_single_item(flags_byte & (1 << flags::IS_SINGLE_ITEM));
1011
930
  if (is_empty || is_single_item) {
@@ -1021,8 +940,8 @@ void kll_sketch<T, C, S, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t
1021
940
  }
1022
941
  }
1023
942
 
1024
- template<typename T, typename C, typename S, typename A>
1025
- void kll_sketch<T, C, S, A>::check_serial_version(uint8_t serial_version) {
943
+ template<typename T, typename C, typename A>
944
+ void kll_sketch<T, C, A>::check_serial_version(uint8_t serial_version) {
1026
945
  if (serial_version != SERIAL_VERSION_1 && serial_version != SERIAL_VERSION_2) {
1027
946
  throw std::invalid_argument("Possible corruption: serial version mismatch: expected "
1028
947
  + std::to_string(SERIAL_VERSION_1) + " or " + std::to_string(SERIAL_VERSION_2)
@@ -1030,16 +949,16 @@ void kll_sketch<T, C, S, A>::check_serial_version(uint8_t serial_version) {
1030
949
  }
1031
950
  }
1032
951
 
1033
- template<typename T, typename C, typename S, typename A>
1034
- void kll_sketch<T, C, S, A>::check_family_id(uint8_t family_id) {
952
+ template<typename T, typename C, typename A>
953
+ void kll_sketch<T, C, A>::check_family_id(uint8_t family_id) {
1035
954
  if (family_id != FAMILY) {
1036
955
  throw std::invalid_argument("Possible corruption: family mismatch: expected "
1037
956
  + std::to_string(FAMILY) + ", got " + std::to_string(family_id));
1038
957
  }
1039
958
  }
1040
959
 
1041
- template <typename T, typename C, typename S, typename A>
1042
- string<A> kll_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items) const {
960
+ template <typename T, typename C, typename A>
961
+ string<A> kll_sketch<T, C, A>::to_string(bool print_levels, bool print_items) const {
1043
962
  // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
1044
963
  // The stream does not support passing an allocator instance, and alternatives are complicated.
1045
964
  std::ostringstream os;
@@ -1057,8 +976,8 @@ string<A> kll_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
1057
976
  os << " Capacity items : " << items_size_ << std::endl;
1058
977
  os << " Retained items : " << get_num_retained() << std::endl;
1059
978
  if (!is_empty()) {
1060
- os << " Min value : " << *min_value_ << std::endl;
1061
- os << " Max value : " << *max_value_ << std::endl;
979
+ os << " Min item : " << *min_item_ << std::endl;
980
+ os << " Max item : " << *max_item_ << std::endl;
1062
981
  }
1063
982
  os << "### End sketch summary" << std::endl;
1064
983
 
@@ -1090,25 +1009,74 @@ string<A> kll_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
1090
1009
  return string<A>(os.str().c_str(), allocator_);
1091
1010
  }
1092
1011
 
1093
- template <typename T, typename C, typename S, typename A>
1094
- typename kll_sketch<T, C, S, A>::const_iterator kll_sketch<T, C, S, A>::begin() const {
1095
- return kll_sketch<T, C, S, A>::const_iterator(items_, levels_.data(), num_levels_);
1012
+ template <typename T, typename C, typename A>
1013
+ typename kll_sketch<T, C, A>::const_iterator kll_sketch<T, C, A>::begin() const {
1014
+ return kll_sketch<T, C, A>::const_iterator(items_, levels_.data(), num_levels_);
1015
+ }
1016
+
1017
+ template <typename T, typename C, typename A>
1018
+ typename kll_sketch<T, C, A>::const_iterator kll_sketch<T, C, A>::end() const {
1019
+ return kll_sketch<T, C, A>::const_iterator(nullptr, levels_.data(), num_levels_);
1020
+ }
1021
+
1022
+ template<typename T, typename C, typename A>
1023
+ class kll_sketch<T, C, A>::item_deleter {
1024
+ public:
1025
+ item_deleter(const A& allocator): allocator_(allocator) {}
1026
+ void operator() (T* ptr) {
1027
+ if (ptr != nullptr) {
1028
+ ptr->~T();
1029
+ allocator_.deallocate(ptr, 1);
1030
+ }
1031
+ }
1032
+ private:
1033
+ A allocator_;
1034
+ };
1035
+
1036
+ template<typename T, typename C, typename A>
1037
+ class kll_sketch<T, C, A>::items_deleter {
1038
+ public:
1039
+ items_deleter(uint32_t start, uint32_t num, const A& allocator):
1040
+ allocator_(allocator), start_(start), num_(num) {}
1041
+ void operator() (T* ptr) {
1042
+ if (ptr != nullptr) {
1043
+ for (uint32_t i = start_; i < num_; ++i) ptr[i].~T();
1044
+ allocator_.deallocate(ptr, num_);
1045
+ }
1046
+ }
1047
+ private:
1048
+ A allocator_;
1049
+ uint32_t start_;
1050
+ uint32_t num_;
1051
+ };
1052
+
1053
+ template<typename T, typename C, typename A>
1054
+ void kll_sketch<T, C, A>::setup_sorted_view() const {
1055
+ if (sorted_view_ == nullptr) {
1056
+ using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
1057
+ sorted_view_ = new (AllocSortedView(allocator_).allocate(1)) quantiles_sorted_view<T, C, A>(get_sorted_view());
1058
+ }
1096
1059
  }
1097
1060
 
1098
- template <typename T, typename C, typename S, typename A>
1099
- typename kll_sketch<T, C, S, A>::const_iterator kll_sketch<T, C, S, A>::end() const {
1100
- return kll_sketch<T, C, S, A>::const_iterator(nullptr, levels_.data(), num_levels_);
1061
+ template<typename T, typename C, typename A>
1062
+ void kll_sketch<T, C, A>::reset_sorted_view() {
1063
+ if (sorted_view_ != nullptr) {
1064
+ sorted_view_->~quantiles_sorted_view();
1065
+ using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
1066
+ AllocSortedView(allocator_).deallocate(sorted_view_, 1);
1067
+ sorted_view_ = nullptr;
1068
+ }
1101
1069
  }
1102
1070
 
1103
1071
  // kll_sketch::const_iterator implementation
1104
1072
 
1105
- template<typename T, typename C, typename S, typename A>
1106
- kll_sketch<T, C, S, A>::const_iterator::const_iterator(const T* items, const uint32_t* levels, const uint8_t num_levels):
1073
+ template<typename T, typename C, typename A>
1074
+ kll_sketch<T, C, A>::const_iterator::const_iterator(const T* items, const uint32_t* levels, const uint8_t num_levels):
1107
1075
  items(items), levels(levels), num_levels(num_levels), index(items == nullptr ? levels[num_levels] : levels[0]), level(items == nullptr ? num_levels : 0), weight(1)
1108
1076
  {}
1109
1077
 
1110
- template<typename T, typename C, typename S, typename A>
1111
- typename kll_sketch<T, C, S, A>::const_iterator& kll_sketch<T, C, S, A>::const_iterator::operator++() {
1078
+ template<typename T, typename C, typename A>
1079
+ typename kll_sketch<T, C, A>::const_iterator& kll_sketch<T, C, A>::const_iterator::operator++() {
1112
1080
  ++index;
1113
1081
  if (index == levels[level + 1]) { // go to the next non-empty level
1114
1082
  do {
@@ -1119,58 +1087,32 @@ typename kll_sketch<T, C, S, A>::const_iterator& kll_sketch<T, C, S, A>::const_i
1119
1087
  return *this;
1120
1088
  }
1121
1089
 
1122
- template<typename T, typename C, typename S, typename A>
1123
- typename kll_sketch<T, C, S, A>::const_iterator& kll_sketch<T, C, S, A>::const_iterator::operator++(int) {
1090
+ template<typename T, typename C, typename A>
1091
+ typename kll_sketch<T, C, A>::const_iterator& kll_sketch<T, C, A>::const_iterator::operator++(int) {
1124
1092
  const_iterator tmp(*this);
1125
1093
  operator++();
1126
1094
  return tmp;
1127
1095
  }
1128
1096
 
1129
- template<typename T, typename C, typename S, typename A>
1130
- bool kll_sketch<T, C, S, A>::const_iterator::operator==(const const_iterator& other) const {
1097
+ template<typename T, typename C, typename A>
1098
+ bool kll_sketch<T, C, A>::const_iterator::operator==(const const_iterator& other) const {
1131
1099
  return index == other.index;
1132
1100
  }
1133
1101
 
1134
- template<typename T, typename C, typename S, typename A>
1135
- bool kll_sketch<T, C, S, A>::const_iterator::operator!=(const const_iterator& other) const {
1102
+ template<typename T, typename C, typename A>
1103
+ bool kll_sketch<T, C, A>::const_iterator::operator!=(const const_iterator& other) const {
1136
1104
  return !operator==(other);
1137
1105
  }
1138
1106
 
1139
- template<typename T, typename C, typename S, typename A>
1140
- const std::pair<const T&, const uint64_t> kll_sketch<T, C, S, A>::const_iterator::operator*() const {
1141
- return std::pair<const T&, const uint64_t>(items[index], weight);
1107
+ template<typename T, typename C, typename A>
1108
+ auto kll_sketch<T, C, A>::const_iterator::operator*() const -> const value_type {
1109
+ return value_type(items[index], weight);
1142
1110
  }
1143
1111
 
1144
- template<typename T, typename C, typename S, typename A>
1145
- class kll_sketch<T, C, S, A>::item_deleter {
1146
- public:
1147
- item_deleter(const A& allocator): allocator_(allocator) {}
1148
- void operator() (T* ptr) {
1149
- if (ptr != nullptr) {
1150
- ptr->~T();
1151
- allocator_.deallocate(ptr, 1);
1152
- }
1153
- }
1154
- private:
1155
- A allocator_;
1156
- };
1157
-
1158
- template<typename T, typename C, typename S, typename A>
1159
- class kll_sketch<T, C, S, A>::items_deleter {
1160
- public:
1161
- items_deleter(uint32_t start, uint32_t num, const A& allocator):
1162
- allocator_(allocator), start_(start), num_(num) {}
1163
- void operator() (T* ptr) {
1164
- if (ptr != nullptr) {
1165
- for (uint32_t i = start_; i < num_; ++i) ptr[i].~T();
1166
- allocator_.deallocate(ptr, num_);
1167
- }
1168
- }
1169
- private:
1170
- A allocator_;
1171
- uint32_t start_;
1172
- uint32_t num_;
1173
- };
1112
+ template<typename T, typename C, typename A>
1113
+ auto kll_sketch<T, C, A>::const_iterator::operator->() const -> const return_value_holder<value_type> {
1114
+ return **this;
1115
+ }
1174
1116
 
1175
1117
  } /* namespace datasketches */
1176
1118