datasketches 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -32,20 +32,22 @@
32
32
 
33
33
  namespace datasketches {
34
34
 
35
- template<typename T, typename C, typename S, typename A>
36
- kll_sketch<T, C, S, A>::kll_sketch(uint16_t k, const A& allocator):
35
+ template<typename T, typename C, typename A>
36
+ kll_sketch<T, C, A>::kll_sketch(uint16_t k, const C& comparator, const A& allocator):
37
+ comparator_(comparator),
37
38
  allocator_(allocator),
38
39
  k_(k),
39
40
  m_(DEFAULT_M),
40
41
  min_k_(k),
41
- n_(0),
42
42
  num_levels_(1),
43
+ is_level_zero_sorted_(false),
44
+ n_(0),
43
45
  levels_(2, 0, allocator),
44
46
  items_(nullptr),
45
47
  items_size_(k_),
46
- min_value_(nullptr),
47
- max_value_(nullptr),
48
- is_level_zero_sorted_(false)
48
+ min_item_(nullptr),
49
+ max_item_(nullptr),
50
+ sorted_view_(nullptr)
49
51
  {
50
52
  if (k < MIN_K || k > MAX_K) {
51
53
  throw std::invalid_argument("K must be >= " + std::to_string(MIN_K) + " and <= " + std::to_string(MAX_K) + ": " + std::to_string(k));
@@ -54,115 +56,126 @@ is_level_zero_sorted_(false)
54
56
  items_ = allocator_.allocate(items_size_);
55
57
  }
56
58
 
57
- template<typename T, typename C, typename S, typename A>
58
- kll_sketch<T, C, S, A>::kll_sketch(const kll_sketch& other):
59
+ template<typename T, typename C, typename A>
60
+ kll_sketch<T, C, A>::kll_sketch(const kll_sketch& other):
61
+ comparator_(other.comparator_),
59
62
  allocator_(other.allocator_),
60
63
  k_(other.k_),
61
64
  m_(other.m_),
62
65
  min_k_(other.min_k_),
63
- n_(other.n_),
64
66
  num_levels_(other.num_levels_),
67
+ is_level_zero_sorted_(other.is_level_zero_sorted_),
68
+ n_(other.n_),
65
69
  levels_(other.levels_),
66
70
  items_(nullptr),
67
71
  items_size_(other.items_size_),
68
- min_value_(nullptr),
69
- max_value_(nullptr),
70
- is_level_zero_sorted_(other.is_level_zero_sorted_)
72
+ min_item_(nullptr),
73
+ max_item_(nullptr),
74
+ sorted_view_(nullptr)
71
75
  {
72
76
  items_ = allocator_.allocate(items_size_);
73
77
  for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
74
- if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
75
- if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
78
+ if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
79
+ if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
76
80
  }
77
81
 
78
- template<typename T, typename C, typename S, typename A>
79
- kll_sketch<T, C, S, A>::kll_sketch(kll_sketch&& other) noexcept:
82
+ template<typename T, typename C, typename A>
83
+ kll_sketch<T, C, A>::kll_sketch(kll_sketch&& other) noexcept:
84
+ comparator_(std::move(other.comparator_)),
80
85
  allocator_(std::move(other.allocator_)),
81
86
  k_(other.k_),
82
87
  m_(other.m_),
83
88
  min_k_(other.min_k_),
84
- n_(other.n_),
85
89
  num_levels_(other.num_levels_),
90
+ is_level_zero_sorted_(other.is_level_zero_sorted_),
91
+ n_(other.n_),
86
92
  levels_(std::move(other.levels_)),
87
93
  items_(other.items_),
88
94
  items_size_(other.items_size_),
89
- min_value_(other.min_value_),
90
- max_value_(other.max_value_),
91
- is_level_zero_sorted_(other.is_level_zero_sorted_)
95
+ min_item_(other.min_item_),
96
+ max_item_(other.max_item_),
97
+ sorted_view_(nullptr)
92
98
  {
93
99
  other.items_ = nullptr;
94
- other.min_value_ = nullptr;
95
- other.max_value_ = nullptr;
100
+ other.min_item_ = nullptr;
101
+ other.max_item_ = nullptr;
96
102
  }
97
103
 
98
- template<typename T, typename C, typename S, typename A>
99
- kll_sketch<T, C, S, A>& kll_sketch<T, C, S, A>::operator=(const kll_sketch& other) {
100
- kll_sketch<T, C, S, A> copy(other);
104
+ template<typename T, typename C, typename A>
105
+ kll_sketch<T, C, A>& kll_sketch<T, C, A>::operator=(const kll_sketch& other) {
106
+ kll_sketch copy(other);
107
+ std::swap(comparator_, copy.comparator_);
101
108
  std::swap(allocator_, copy.allocator_);
102
109
  std::swap(k_, copy.k_);
103
110
  std::swap(m_, copy.m_);
104
111
  std::swap(min_k_, copy.min_k_);
105
- std::swap(n_, copy.n_);
106
112
  std::swap(num_levels_, copy.num_levels_);
113
+ std::swap(is_level_zero_sorted_, copy.is_level_zero_sorted_);
114
+ std::swap(n_, copy.n_);
107
115
  std::swap(levels_, copy.levels_);
108
116
  std::swap(items_, copy.items_);
109
117
  std::swap(items_size_, copy.items_size_);
110
- std::swap(min_value_, copy.min_value_);
111
- std::swap(max_value_, copy.max_value_);
112
- std::swap(is_level_zero_sorted_, copy.is_level_zero_sorted_);
118
+ std::swap(min_item_, copy.min_item_);
119
+ std::swap(max_item_, copy.max_item_);
120
+ reset_sorted_view();
113
121
  return *this;
114
122
  }
115
123
 
116
- template<typename T, typename C, typename S, typename A>
117
- kll_sketch<T, C, S, A>& kll_sketch<T, C, S, A>::operator=(kll_sketch&& other) {
124
+ template<typename T, typename C, typename A>
125
+ kll_sketch<T, C, A>& kll_sketch<T, C, A>::operator=(kll_sketch&& other) {
126
+ std::swap(comparator_, other.comparator_);
118
127
  std::swap(allocator_, other.allocator_);
119
128
  std::swap(k_, other.k_);
120
129
  std::swap(m_, other.m_);
121
130
  std::swap(min_k_, other.min_k_);
122
- std::swap(n_, other.n_);
123
131
  std::swap(num_levels_, other.num_levels_);
132
+ std::swap(is_level_zero_sorted_, other.is_level_zero_sorted_);
133
+ std::swap(n_, other.n_);
124
134
  std::swap(levels_, other.levels_);
125
135
  std::swap(items_, other.items_);
126
136
  std::swap(items_size_, other.items_size_);
127
- std::swap(min_value_, other.min_value_);
128
- std::swap(max_value_, other.max_value_);
129
- std::swap(is_level_zero_sorted_, other.is_level_zero_sorted_);
137
+ std::swap(min_item_, other.min_item_);
138
+ std::swap(max_item_, other.max_item_);
139
+ reset_sorted_view();
130
140
  return *this;
131
141
  }
132
142
 
133
- template<typename T, typename C, typename S, typename A>
134
- kll_sketch<T, C, S, A>::~kll_sketch() {
143
+ template<typename T, typename C, typename A>
144
+ kll_sketch<T, C, A>::~kll_sketch() {
135
145
  if (items_ != nullptr) {
136
146
  const uint32_t begin = levels_[0];
137
147
  const uint32_t end = levels_[num_levels_];
138
148
  for (uint32_t i = begin; i < end; i++) items_[i].~T();
139
149
  allocator_.deallocate(items_, items_size_);
140
150
  }
141
- if (min_value_ != nullptr) {
142
- min_value_->~T();
143
- allocator_.deallocate(min_value_, 1);
151
+ if (min_item_ != nullptr) {
152
+ min_item_->~T();
153
+ allocator_.deallocate(min_item_, 1);
144
154
  }
145
- if (max_value_ != nullptr) {
146
- max_value_->~T();
147
- allocator_.deallocate(max_value_, 1);
155
+ if (max_item_ != nullptr) {
156
+ max_item_->~T();
157
+ allocator_.deallocate(max_item_, 1);
148
158
  }
159
+ reset_sorted_view();
149
160
  }
150
161
 
151
- template<typename T, typename C, typename S, typename A>
152
- template<typename TT, typename CC, typename SS, typename AA>
153
- kll_sketch<T, C, S, A>::kll_sketch(const kll_sketch<TT, CC, SS, AA>& other, const A& allocator):
162
+ template<typename T, typename C, typename A>
163
+ template<typename TT, typename CC, typename AA>
164
+ kll_sketch<T, C, A>::kll_sketch(const kll_sketch<TT, CC, AA>& other, const C& comparator, const A& allocator):
165
+ comparator_(comparator),
154
166
  allocator_(allocator),
155
167
  k_(other.k_),
156
168
  m_(other.m_),
157
169
  min_k_(other.min_k_),
158
- n_(other.n_),
159
170
  num_levels_(other.num_levels_),
171
+ is_level_zero_sorted_(other.is_level_zero_sorted_),
172
+ n_(other.n_),
160
173
  levels_(other.levels_, allocator_),
161
174
  items_(nullptr),
162
175
  items_size_(other.items_size_),
163
- min_value_(nullptr),
164
- max_value_(nullptr),
165
- is_level_zero_sorted_(other.is_level_zero_sorted_)
176
+ min_item_(nullptr),
177
+ max_item_(nullptr),
178
+ sorted_view_(nullptr)
166
179
  {
167
180
  static_assert(
168
181
  std::is_constructible<T, TT>::value,
@@ -170,52 +183,53 @@ is_level_zero_sorted_(other.is_level_zero_sorted_)
170
183
  );
171
184
  items_ = allocator_.allocate(items_size_);
172
185
  for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
173
- if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
174
- if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
186
+ if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
187
+ if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
175
188
  check_sorting();
176
189
  }
177
190
 
178
- template<typename T, typename C, typename S, typename A>
191
+ template<typename T, typename C, typename A>
179
192
  template<typename FwdT>
180
- void kll_sketch<T, C, S, A>::update(FwdT&& value) {
181
- if (!check_update_value(value)) { return; }
182
- update_min_max(value);
193
+ void kll_sketch<T, C, A>::update(FwdT&& item) {
194
+ if (!check_update_item(item)) { return; }
195
+ update_min_max(item);
183
196
  const uint32_t index = internal_update();
184
- new (&items_[index]) T(std::forward<FwdT>(value));
197
+ new (&items_[index]) T(std::forward<FwdT>(item));
198
+ reset_sorted_view();
185
199
  }
186
200
 
187
- template<typename T, typename C, typename S, typename A>
188
- void kll_sketch<T, C, S, A>::update_min_max(const T& value) {
201
+ template<typename T, typename C, typename A>
202
+ void kll_sketch<T, C, A>::update_min_max(const T& item) {
189
203
  if (is_empty()) {
190
- min_value_ = new (allocator_.allocate(1)) T(value);
191
- max_value_ = new (allocator_.allocate(1)) T(value);
204
+ min_item_ = new (allocator_.allocate(1)) T(item);
205
+ max_item_ = new (allocator_.allocate(1)) T(item);
192
206
  } else {
193
- if (C()(value, *min_value_)) *min_value_ = value;
194
- if (C()(*max_value_, value)) *max_value_ = value;
207
+ if (comparator_(item, *min_item_)) *min_item_ = item;
208
+ if (comparator_(*max_item_, item)) *max_item_ = item;
195
209
  }
196
210
  }
197
211
 
198
- template<typename T, typename C, typename S, typename A>
199
- uint32_t kll_sketch<T, C, S, A>::internal_update() {
212
+ template<typename T, typename C, typename A>
213
+ uint32_t kll_sketch<T, C, A>::internal_update() {
200
214
  if (levels_[0] == 0) compress_while_updating();
201
215
  n_++;
202
216
  is_level_zero_sorted_ = false;
203
217
  return --levels_[0];
204
218
  }
205
219
 
206
- template<typename T, typename C, typename S, typename A>
220
+ template<typename T, typename C, typename A>
207
221
  template<typename FwdSk>
208
- void kll_sketch<T, C, S, A>::merge(FwdSk&& other) {
222
+ void kll_sketch<T, C, A>::merge(FwdSk&& other) {
209
223
  if (other.is_empty()) return;
210
224
  if (m_ != other.m_) {
211
225
  throw std::invalid_argument("incompatible M: " + std::to_string(m_) + " and " + std::to_string(other.m_));
212
226
  }
213
227
  if (is_empty()) {
214
- min_value_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.min_value_));
215
- max_value_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.max_value_));
228
+ min_item_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.min_item_));
229
+ max_item_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.max_item_));
216
230
  } else {
217
- if (C()(*other.min_value_, *min_value_)) *min_value_ = conditional_forward<FwdSk>(*other.min_value_);
218
- if (C()(*max_value_, *other.max_value_)) *max_value_ = conditional_forward<FwdSk>(*other.max_value_);
231
+ if (comparator_(*other.min_item_, *min_item_)) *min_item_ = conditional_forward<FwdSk>(*other.min_item_);
232
+ if (comparator_(*max_item_, *other.max_item_)) *max_item_ = conditional_forward<FwdSk>(*other.max_item_);
219
233
  }
220
234
  const uint64_t final_n = n_ + other.n_;
221
235
  for (uint32_t i = other.levels_[0]; i < other.levels_[1]; i++) {
@@ -226,149 +240,133 @@ void kll_sketch<T, C, S, A>::merge(FwdSk&& other) {
226
240
  n_ = final_n;
227
241
  if (other.is_estimation_mode()) min_k_ = std::min(min_k_, other.min_k_);
228
242
  assert_correct_total_weight();
243
+ reset_sorted_view();
229
244
  }
230
245
 
231
- template<typename T, typename C, typename S, typename A>
232
- bool kll_sketch<T, C, S, A>::is_empty() const {
246
+ template<typename T, typename C, typename A>
247
+ bool kll_sketch<T, C, A>::is_empty() const {
233
248
  return n_ == 0;
234
249
  }
235
250
 
236
- template<typename T, typename C, typename S, typename A>
237
- uint16_t kll_sketch<T, C, S, A>::get_k() const {
251
+ template<typename T, typename C, typename A>
252
+ uint16_t kll_sketch<T, C, A>::get_k() const {
238
253
  return k_;
239
254
  }
240
255
 
241
- template<typename T, typename C, typename S, typename A>
242
- uint64_t kll_sketch<T, C, S, A>::get_n() const {
256
+ template<typename T, typename C, typename A>
257
+ uint64_t kll_sketch<T, C, A>::get_n() const {
243
258
  return n_;
244
259
  }
245
260
 
246
- template<typename T, typename C, typename S, typename A>
247
- uint32_t kll_sketch<T, C, S, A>::get_num_retained() const {
261
+ template<typename T, typename C, typename A>
262
+ uint32_t kll_sketch<T, C, A>::get_num_retained() const {
248
263
  return levels_[num_levels_] - levels_[0];
249
264
  }
250
265
 
251
- template<typename T, typename C, typename S, typename A>
252
- bool kll_sketch<T, C, S, A>::is_estimation_mode() const {
266
+ template<typename T, typename C, typename A>
267
+ bool kll_sketch<T, C, A>::is_estimation_mode() const {
253
268
  return num_levels_ > 1;
254
269
  }
255
270
 
256
- template<typename T, typename C, typename S, typename A>
257
- T kll_sketch<T, C, S, A>::get_min_value() const {
258
- if (is_empty()) return get_invalid_value();
259
- return *min_value_;
271
+ template<typename T, typename C, typename A>
272
+ T kll_sketch<T, C, A>::get_min_item() const {
273
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
274
+ return *min_item_;
260
275
  }
261
276
 
262
- template<typename T, typename C, typename S, typename A>
263
- T kll_sketch<T, C, S, A>::get_max_value() const {
264
- if (is_empty()) return get_invalid_value();
265
- return *max_value_;
277
+ template<typename T, typename C, typename A>
278
+ T kll_sketch<T, C, A>::get_max_item() const {
279
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
280
+ return *max_item_;
266
281
  }
267
282
 
268
- template<typename T, typename C, typename S, typename A>
269
- C kll_sketch<T, C, S, A>::get_comparator() const {
270
- return C();
283
+ template<typename T, typename C, typename A>
284
+ C kll_sketch<T, C, A>::get_comparator() const {
285
+ return comparator_;
271
286
  }
272
287
 
273
- template<typename T, typename C, typename S, typename A>
274
- template<bool inclusive>
275
- auto kll_sketch<T, C, S, A>::get_quantile(double rank) const -> quantile_return_type {
276
- if (is_empty()) return get_invalid_value();
277
- if (rank == 0.0) return *min_value_;
278
- if (rank == 1.0) return *max_value_;
288
+ template<typename T, typename C, typename A>
289
+ A kll_sketch<T, C, A>::get_allocator() const {
290
+ return allocator_;
291
+ }
292
+
293
+ template<typename T, typename C, typename A>
294
+ double kll_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
295
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
296
+ setup_sorted_view();
297
+ return sorted_view_->get_rank(item, inclusive);
298
+ }
299
+
300
+ template<typename T, typename C, typename A>
301
+ auto kll_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
302
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
303
+ setup_sorted_view();
304
+ return sorted_view_->get_PMF(split_points, size, inclusive);
305
+ }
306
+
307
+ template<typename T, typename C, typename A>
308
+ auto kll_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
309
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
310
+ setup_sorted_view();
311
+ return sorted_view_->get_CDF(split_points, size, inclusive);
312
+ }
313
+
314
+ template<typename T, typename C, typename A>
315
+ auto kll_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
316
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
279
317
  if ((rank < 0.0) || (rank > 1.0)) {
280
- throw std::invalid_argument("Fraction cannot be less than zero or greater than 1.0");
318
+ throw std::invalid_argument("normalized rank cannot be less than zero or greater than 1.0");
281
319
  }
282
320
  // may have a side effect of sorting level zero if needed
283
- return get_sorted_view<inclusive>(true).get_quantile(rank);
321
+ setup_sorted_view();
322
+ return sorted_view_->get_quantile(rank, inclusive);
284
323
  }
285
324
 
286
- template<typename T, typename C, typename S, typename A>
287
- template<bool inclusive>
288
- std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(const double* ranks, uint32_t size) const {
325
+ template<typename T, typename C, typename A>
326
+ std::vector<T, A> kll_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
327
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
289
328
  std::vector<T, A> quantiles(allocator_);
290
- if (is_empty()) return quantiles;
291
329
  quantiles.reserve(size);
292
330
 
293
331
  // may have a side effect of sorting level zero if needed
294
- auto view = get_sorted_view<inclusive>(true);
332
+ setup_sorted_view();
295
333
 
296
334
  for (uint32_t i = 0; i < size; i++) {
297
335
  const double rank = ranks[i];
298
336
  if ((rank < 0.0) || (rank > 1.0)) {
299
- throw std::invalid_argument("Fraction cannot be less than zero or greater than 1.0");
300
- }
301
- else if (rank == 0.0) quantiles.push_back(*min_value_);
302
- else if (rank == 1.0) quantiles.push_back(*max_value_);
303
- else {
304
- quantiles.push_back(view.get_quantile(rank));
337
+ throw std::invalid_argument("normalized rank cannot be less than 0 or greater than 1");
305
338
  }
339
+ quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
306
340
  }
307
341
  return quantiles;
308
342
  }
309
343
 
310
- template<typename T, typename C, typename S, typename A>
311
- template<bool inclusive>
312
- std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(uint32_t num) const {
313
- if (is_empty()) return std::vector<T, A>(allocator_);
344
+ template<typename T, typename C, typename A>
345
+ std::vector<T, A> kll_sketch<T, C, A>::get_quantiles(uint32_t num, bool inclusive) const {
346
+ if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
314
347
  if (num == 0) {
315
348
  throw std::invalid_argument("num must be > 0");
316
349
  }
317
- vector_d<A> fractions(num, 0, allocator_);
318
- fractions[0] = 0.0;
350
+ vector_double ranks(num, 0, allocator_);
351
+ ranks[0] = 0.0;
319
352
  for (size_t i = 1; i < num; i++) {
320
- fractions[i] = static_cast<double>(i) / (num - 1);
353
+ ranks[i] = static_cast<double>(i) / (num - 1);
321
354
  }
322
355
  if (num > 1) {
323
- fractions[num - 1] = 1.0;
324
- }
325
- return get_quantiles<inclusive>(fractions.data(), num);
326
- }
327
-
328
- template<typename T, typename C, typename S, typename A>
329
- template<bool inclusive>
330
- double kll_sketch<T, C, S, A>::get_rank(const T& value) const {
331
- if (is_empty()) return std::numeric_limits<double>::quiet_NaN();
332
- uint8_t level = 0;
333
- uint64_t weight = 1;
334
- uint64_t total = 0;
335
- while (level < num_levels_) {
336
- const auto from_index = levels_[level];
337
- const auto to_index = levels_[level + 1]; // exclusive
338
- for (uint32_t i = from_index; i < to_index; i++) {
339
- if (inclusive ? !C()(value, items_[i]) : C()(items_[i], value)) {
340
- total += weight;
341
- } else if ((level > 0) || is_level_zero_sorted_) {
342
- break; // levels above 0 are sorted, no point comparing further
343
- }
344
- }
345
- level++;
346
- weight *= 2;
356
+ ranks[num - 1] = 1.0;
347
357
  }
348
- return (double) total / n_;
349
- }
350
-
351
- template<typename T, typename C, typename S, typename A>
352
- template<bool inclusive>
353
- vector_d<A> kll_sketch<T, C, S, A>::get_PMF(const T* split_points, uint32_t size) const {
354
- return get_PMF_or_CDF<inclusive>(split_points, size, false);
355
- }
356
-
357
- template<typename T, typename C, typename S, typename A>
358
- template<bool inclusive>
359
- vector_d<A> kll_sketch<T, C, S, A>::get_CDF(const T* split_points, uint32_t size) const {
360
- return get_PMF_or_CDF<inclusive>(split_points, size, true);
358
+ return get_quantiles(ranks.data(), num, inclusive);
361
359
  }
362
360
 
363
- template<typename T, typename C, typename S, typename A>
364
- double kll_sketch<T, C, S, A>::get_normalized_rank_error(bool pmf) const {
361
+ template<typename T, typename C, typename A>
362
+ double kll_sketch<T, C, A>::get_normalized_rank_error(bool pmf) const {
365
363
  return get_normalized_rank_error(min_k_, pmf);
366
364
  }
367
365
 
368
366
  // implementation for fixed-size arithmetic types (integral and floating point)
369
- template<typename T, typename C, typename S, typename A>
367
+ template<typename T, typename C, typename A>
370
368
  template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
371
- size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe&) const {
369
+ size_t kll_sketch<T, C, A>::get_serialized_size_bytes(const SerDe&) const {
372
370
  if (is_empty()) { return EMPTY_SIZE_BYTES; }
373
371
  if (num_levels_ == 1 && get_num_retained() == 1) {
374
372
  return DATA_START_SINGLE_ITEM + sizeof(TT);
@@ -378,25 +376,25 @@ size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe&) const {
378
376
  }
379
377
 
380
378
  // implementation for all other types
381
- template<typename T, typename C, typename S, typename A>
379
+ template<typename T, typename C, typename A>
382
380
  template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
383
- size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const {
381
+ size_t kll_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& sd) const {
384
382
  if (is_empty()) { return EMPTY_SIZE_BYTES; }
385
383
  if (num_levels_ == 1 && get_num_retained() == 1) {
386
384
  return DATA_START_SINGLE_ITEM + sd.size_of_item(items_[levels_[0]]);
387
385
  }
388
386
  // the last integer in the levels_ array is not serialized because it can be derived
389
387
  size_t size = DATA_START + num_levels_ * sizeof(uint32_t);
390
- size += sd.size_of_item(*min_value_);
391
- size += sd.size_of_item(*max_value_);
388
+ size += sd.size_of_item(*min_item_);
389
+ size += sd.size_of_item(*max_item_);
392
390
  for (auto it: *this) size += sd.size_of_item(it.first);
393
391
  return size;
394
392
  }
395
393
 
396
394
  // implementation for fixed-size arithmetic types (integral and floating point)
397
- template<typename T, typename C, typename S, typename A>
395
+ template<typename T, typename C, typename A>
398
396
  template<typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
399
- size_t kll_sketch<T, C, S, A>::get_max_serialized_size_bytes(uint16_t k, uint64_t n) {
397
+ size_t kll_sketch<T, C, A>::get_max_serialized_size_bytes(uint16_t k, uint64_t n) {
400
398
  const uint8_t num_levels = kll_helper::ub_on_num_levels(n);
401
399
  const uint32_t max_num_retained = kll_helper::compute_total_capacity(k, DEFAULT_M, num_levels);
402
400
  // the last integer in the levels_ array is not serialized because it can be derived
@@ -404,18 +402,18 @@ size_t kll_sketch<T, C, S, A>::get_max_serialized_size_bytes(uint16_t k, uint64_
404
402
  }
405
403
 
406
404
  // implementation for all other types
407
- template<typename T, typename C, typename S, typename A>
405
+ template<typename T, typename C, typename A>
408
406
  template<typename TT, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
409
- size_t kll_sketch<T, C, S, A>::get_max_serialized_size_bytes(uint16_t k, uint64_t n, size_t max_item_size_bytes) {
407
+ size_t kll_sketch<T, C, A>::get_max_serialized_size_bytes(uint16_t k, uint64_t n, size_t max_item_size_bytes) {
410
408
  const uint8_t num_levels = kll_helper::ub_on_num_levels(n);
411
409
  const uint32_t max_num_retained = kll_helper::compute_total_capacity(k, DEFAULT_M, num_levels);
412
410
  // the last integer in the levels_ array is not serialized because it can be derived
413
411
  return DATA_START + num_levels * sizeof(uint32_t) + (max_num_retained + 2) * max_item_size_bytes;
414
412
  }
415
413
 
416
- template<typename T, typename C, typename S, typename A>
414
+ template<typename T, typename C, typename A>
417
415
  template<typename SerDe>
418
- void kll_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const {
416
+ void kll_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& sd) const {
419
417
  const bool is_single_item = n_ == 1;
420
418
  const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
421
419
  write(os, preamble_ints);
@@ -440,18 +438,18 @@ void kll_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const
440
438
  write(os, num_levels_);
441
439
  write(os, unused);
442
440
  write(os, levels_.data(), sizeof(levels_[0]) * num_levels_);
443
- sd.serialize(os, min_value_, 1);
444
- sd.serialize(os, max_value_, 1);
441
+ sd.serialize(os, min_item_, 1);
442
+ sd.serialize(os, max_item_, 1);
445
443
  }
446
444
  sd.serialize(os, &items_[levels_[0]], get_num_retained());
447
445
  }
448
446
 
449
- template<typename T, typename C, typename S, typename A>
447
+ template<typename T, typename C, typename A>
450
448
  template<typename SerDe>
451
- vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
449
+ auto kll_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
452
450
  const bool is_single_item = n_ == 1;
453
451
  const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
454
- vector_u8<A> bytes(size, 0, allocator_);
452
+ vector_bytes bytes(size, 0, allocator_);
455
453
  uint8_t* ptr = bytes.data() + header_size_bytes;
456
454
  const uint8_t* end_ptr = ptr + size;
457
455
  const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
@@ -476,25 +474,22 @@ vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const
476
474
  ptr += copy_to_mem(num_levels_, ptr);
477
475
  ptr += sizeof(uint8_t); // unused
478
476
  ptr += copy_to_mem(levels_.data(), ptr, sizeof(levels_[0]) * num_levels_);
479
- ptr += sd.serialize(ptr, end_ptr - ptr, min_value_, 1);
480
- ptr += sd.serialize(ptr, end_ptr - ptr, max_value_, 1);
477
+ ptr += sd.serialize(ptr, end_ptr - ptr, min_item_, 1);
478
+ ptr += sd.serialize(ptr, end_ptr - ptr, max_item_, 1);
481
479
  }
482
480
  const size_t bytes_remaining = end_ptr - ptr;
483
481
  ptr += sd.serialize(ptr, bytes_remaining, &items_[levels_[0]], get_num_retained());
484
482
  }
485
483
  const size_t delta = ptr - bytes.data();
486
- if (delta != size) throw std::logic_error("serialized size mismatch: " + std::to_string(delta) + " != " + std::to_string(size));
484
+ if (delta != size) throw std::logic_error("serialized size mismatch: " + std::to_string(delta)
485
+ + " != " + std::to_string(size));
487
486
  return bytes;
488
487
  }
489
488
 
490
- template<typename T, typename C, typename S, typename A>
491
- kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, const A& allocator) {
492
- return deserialize(is, S(), allocator);
493
- }
494
-
495
- template<typename T, typename C, typename S, typename A>
489
+ template<typename T, typename C, typename A>
496
490
  template<typename SerDe>
497
- kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
491
+ kll_sketch<T, C, A> kll_sketch<T, C, A>::deserialize(std::istream& is, const SerDe& sd,
492
+ const C& comparator, const A& allocator) {
498
493
  const auto preamble_ints = read<uint8_t>(is);
499
494
  const auto serial_version = read<uint8_t>(is);
500
495
  const auto family_id = read<uint8_t>(is);
@@ -510,7 +505,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, con
510
505
 
511
506
  if (!is.good()) throw std::runtime_error("error reading from std::istream");
512
507
  const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
513
- if (is_empty) return kll_sketch(k, allocator);
508
+ if (is_empty) return kll_sketch(k, comparator, allocator);
514
509
 
515
510
  uint64_t n;
516
511
  uint16_t min_k;
@@ -526,7 +521,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, con
526
521
  num_levels = read<uint8_t>(is);
527
522
  read<uint8_t>(is); // skip unused byte
528
523
  }
529
- vector_u32<A> levels(num_levels + 1, 0, allocator);
524
+ vector_u32 levels(num_levels + 1, 0, allocator);
530
525
  const uint32_t capacity(kll_helper::compute_total_capacity(k, m, num_levels));
531
526
  if (is_single_item) {
532
527
  levels[0] = capacity - 1;
@@ -537,17 +532,17 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, con
537
532
  levels[num_levels] = capacity;
538
533
  A alloc(allocator);
539
534
  auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
540
- std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
541
- std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
542
- std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
543
- std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
535
+ std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
536
+ std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
537
+ std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
538
+ std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
544
539
  if (!is_single_item) {
545
- sd.deserialize(is, min_value_buffer.get(), 1);
540
+ sd.deserialize(is, min_item_buffer.get(), 1);
546
541
  // serde call did not throw, repackage with destrtuctor
547
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
548
- sd.deserialize(is, max_value_buffer.get(), 1);
542
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
543
+ sd.deserialize(is, max_item_buffer.get(), 1);
549
544
  // serde call did not throw, repackage with destrtuctor
550
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
545
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
551
546
  }
552
547
  auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
553
548
  std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(alloc.allocate(capacity), items_buffer_deleter);
@@ -557,27 +552,23 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, con
557
552
  std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity, allocator));
558
553
  const bool is_level_zero_sorted = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
559
554
  if (is_single_item) {
560
- new (min_value_buffer.get()) T(items.get()[levels[0]]);
555
+ new (min_item_buffer.get()) T(items.get()[levels[0]]);
561
556
  // copy did not throw, repackage with destrtuctor
562
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
563
- new (max_value_buffer.get()) T(items.get()[levels[0]]);
557
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
558
+ new (max_item_buffer.get()) T(items.get()[levels[0]]);
564
559
  // copy did not throw, repackage with destrtuctor
565
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
560
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
566
561
  }
567
562
  if (!is.good())
568
563
  throw std::runtime_error("error reading from std::istream");
569
564
  return kll_sketch(k, min_k, n, num_levels, std::move(levels), std::move(items), capacity,
570
- std::move(min_value), std::move(max_value), is_level_zero_sorted);
571
- }
572
-
573
- template<typename T, typename C, typename S, typename A>
574
- kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
575
- return deserialize(bytes, size, S(), allocator);
565
+ std::move(min_item), std::move(max_item), is_level_zero_sorted, comparator);
576
566
  }
577
567
 
578
- template<typename T, typename C, typename S, typename A>
568
+ template<typename T, typename C, typename A>
579
569
  template<typename SerDe>
580
- kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
570
+ kll_sketch<T, C, A> kll_sketch<T, C, A>::deserialize(const void* bytes, size_t size, const SerDe& sd,
571
+ const C& comparator, const A& allocator) {
581
572
  ensure_minimum_memory(size, 8);
582
573
  const char* ptr = static_cast<const char*>(bytes);
583
574
  uint8_t preamble_ints;
@@ -601,7 +592,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
601
592
  ensure_minimum_memory(size, preamble_ints * sizeof(uint32_t));
602
593
 
603
594
  const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
604
- if (is_empty) return kll_sketch<T, C, S, A>(k, allocator);
595
+ if (is_empty) return kll_sketch(k, comparator, allocator);
605
596
 
606
597
  uint64_t n;
607
598
  uint16_t min_k;
@@ -618,7 +609,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
618
609
  ptr += copy_from_mem(ptr, num_levels);
619
610
  ptr += sizeof(uint8_t); // skip unused byte
620
611
  }
621
- vector_u32<A> levels(num_levels + 1, 0, allocator);
612
+ vector_u32 levels(num_levels + 1, 0, allocator);
622
613
  const uint32_t capacity(kll_helper::compute_total_capacity(k, m, num_levels));
623
614
  if (is_single_item) {
624
615
  levels[0] = capacity - 1;
@@ -629,17 +620,17 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
629
620
  levels[num_levels] = capacity;
630
621
  A alloc(allocator);
631
622
  auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
632
- std::unique_ptr<T, decltype(item_buffer_deleter)> min_value_buffer(alloc.allocate(1), item_buffer_deleter);
633
- std::unique_ptr<T, decltype(item_buffer_deleter)> max_value_buffer(alloc.allocate(1), item_buffer_deleter);
634
- std::unique_ptr<T, item_deleter> min_value(nullptr, item_deleter(allocator));
635
- std::unique_ptr<T, item_deleter> max_value(nullptr, item_deleter(allocator));
623
+ std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
624
+ std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
625
+ std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
626
+ std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
636
627
  if (!is_single_item) {
637
- ptr += sd.deserialize(ptr, end_ptr - ptr, min_value_buffer.get(), 1);
628
+ ptr += sd.deserialize(ptr, end_ptr - ptr, min_item_buffer.get(), 1);
638
629
  // serde call did not throw, repackage with destrtuctor
639
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
640
- ptr += sd.deserialize(ptr, end_ptr - ptr, max_value_buffer.get(), 1);
630
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
631
+ ptr += sd.deserialize(ptr, end_ptr - ptr, max_item_buffer.get(), 1);
641
632
  // serde call did not throw, repackage with destrtuctor
642
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
633
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
643
634
  }
644
635
  auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
645
636
  std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(alloc.allocate(capacity), items_buffer_deleter);
@@ -651,15 +642,15 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
651
642
  if (delta != size) throw std::logic_error("deserialized size mismatch: " + std::to_string(delta) + " != " + std::to_string(size));
652
643
  const bool is_level_zero_sorted = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
653
644
  if (is_single_item) {
654
- new (min_value_buffer.get()) T(items.get()[levels[0]]);
645
+ new (min_item_buffer.get()) T(items.get()[levels[0]]);
655
646
  // copy did not throw, repackage with destrtuctor
656
- min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
657
- new (max_value_buffer.get()) T(items.get()[levels[0]]);
647
+ min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
648
+ new (max_item_buffer.get()) T(items.get()[levels[0]]);
658
649
  // copy did not throw, repackage with destrtuctor
659
- max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
650
+ max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
660
651
  }
661
652
  return kll_sketch(k, min_k, n, num_levels, std::move(levels), std::move(items), capacity,
662
- std::move(min_value), std::move(max_value), is_level_zero_sorted);
653
+ std::move(min_item), std::move(max_item), is_level_zero_sorted, comparator);
663
654
  }
664
655
 
665
656
  /*
@@ -669,36 +660,38 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
669
660
  * Otherwise, it is the "single-sided" normalized rank error for all the other queries.
670
661
  * Constants were derived as the best fit to 99 percentile empirically measured max error in thousands of trials
671
662
  */
672
- template<typename T, typename C, typename S, typename A>
673
- double kll_sketch<T, C, S, A>::get_normalized_rank_error(uint16_t k, bool pmf) {
663
+ template<typename T, typename C, typename A>
664
+ double kll_sketch<T, C, A>::get_normalized_rank_error(uint16_t k, bool pmf) {
674
665
  return pmf
675
666
  ? 2.446 / pow(k, 0.9433)
676
667
  : 2.296 / pow(k, 0.9723);
677
668
  }
678
669
 
679
670
  // for deserialization
680
- template<typename T, typename C, typename S, typename A>
681
- kll_sketch<T, C, S, A>::kll_sketch(uint16_t k, uint16_t min_k, uint64_t n, uint8_t num_levels, vector_u32<A>&& levels,
682
- std::unique_ptr<T, items_deleter> items, uint32_t items_size, std::unique_ptr<T, item_deleter> min_value,
683
- std::unique_ptr<T, item_deleter> max_value, bool is_level_zero_sorted):
671
+ template<typename T, typename C, typename A>
672
+ kll_sketch<T, C, A>::kll_sketch(uint16_t k, uint16_t min_k, uint64_t n, uint8_t num_levels, vector_u32&& levels,
673
+ std::unique_ptr<T, items_deleter> items, uint32_t items_size, std::unique_ptr<T, item_deleter> min_item,
674
+ std::unique_ptr<T, item_deleter> max_item, bool is_level_zero_sorted, const C& comparator):
675
+ comparator_(comparator),
684
676
  allocator_(levels.get_allocator()),
685
677
  k_(k),
686
678
  m_(DEFAULT_M),
687
679
  min_k_(min_k),
688
- n_(n),
689
680
  num_levels_(num_levels),
681
+ is_level_zero_sorted_(is_level_zero_sorted),
682
+ n_(n),
690
683
  levels_(std::move(levels)),
691
684
  items_(items.release()),
692
685
  items_size_(items_size),
693
- min_value_(min_value.release()),
694
- max_value_(max_value.release()),
695
- is_level_zero_sorted_(is_level_zero_sorted)
686
+ min_item_(min_item.release()),
687
+ max_item_(max_item.release()),
688
+ sorted_view_(nullptr)
696
689
  {}
697
690
 
698
691
  // The following code is only valid in the special case of exactly reaching capacity while updating.
699
692
  // It cannot be used while merging, while reducing k, or anything else.
700
- template<typename T, typename C, typename S, typename A>
701
- void kll_sketch<T, C, S, A>::compress_while_updating(void) {
693
+ template<typename T, typename C, typename A>
694
+ void kll_sketch<T, C, A>::compress_while_updating(void) {
702
695
  const uint8_t level = find_level_to_compact();
703
696
 
704
697
  // It is important to add the new top level right here. Be aware that this operation
@@ -722,7 +715,7 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
722
715
  // level zero might not be sorted, so we must sort it if we wish to compact it
723
716
  // sort_level_zero() is not used here because of the adjustment for odd number of items
724
717
  if ((level == 0) && !is_level_zero_sorted_) {
725
- std::sort(items_ + adj_beg, items_ + adj_beg + adj_pop, C());
718
+ std::sort(items_ + adj_beg, items_ + adj_beg + adj_pop, comparator_);
726
719
  }
727
720
  if (pop_above == 0) {
728
721
  kll_helper::randomly_halve_up(items_, adj_beg, adj_pop);
@@ -751,8 +744,8 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
751
744
  for (uint32_t i = 0; i < half_adj_pop; i++) items_[i + destroy_beg].~T();
752
745
  }
753
746
 
754
- template<typename T, typename C, typename S, typename A>
755
- uint8_t kll_sketch<T, C, S, A>::find_level_to_compact() const {
747
+ template<typename T, typename C, typename A>
748
+ uint8_t kll_sketch<T, C, A>::find_level_to_compact() const {
756
749
  uint8_t level = 0;
757
750
  while (true) {
758
751
  if (level >= num_levels_) throw std::logic_error("capacity calculation error");
@@ -765,8 +758,8 @@ uint8_t kll_sketch<T, C, S, A>::find_level_to_compact() const {
765
758
  }
766
759
  }
767
760
 
768
- template<typename T, typename C, typename S, typename A>
769
- void kll_sketch<T, C, S, A>::add_empty_top_level_to_completely_full_sketch() {
761
+ template<typename T, typename C, typename A>
762
+ void kll_sketch<T, C, A>::add_empty_top_level_to_completely_full_sketch() {
770
763
  const uint32_t cur_total_cap = levels_[num_levels_];
771
764
 
772
765
  // make sure that we are following a certain growth scheme
@@ -800,124 +793,50 @@ void kll_sketch<T, C, S, A>::add_empty_top_level_to_completely_full_sketch() {
800
793
  levels_[num_levels_] = new_total_cap; // initialize the new "extra" index at the top
801
794
  }
802
795
 
803
- template<typename T, typename C, typename S, typename A>
804
- void kll_sketch<T, C, S, A>::sort_level_zero() {
796
+ template<typename T, typename C, typename A>
797
+ void kll_sketch<T, C, A>::sort_level_zero() {
805
798
  if (!is_level_zero_sorted_) {
806
- std::sort(items_ + levels_[0], items_ + levels_[1], C());
799
+ std::sort(items_ + levels_[0], items_ + levels_[1], comparator_);
807
800
  is_level_zero_sorted_ = true;
808
801
  }
809
802
  }
810
803
 
811
- template<typename T, typename C, typename S, typename A>
812
- void kll_sketch<T, C, S, A>::check_sorting() const {
804
+ template<typename T, typename C, typename A>
805
+ void kll_sketch<T, C, A>::check_sorting() const {
813
806
  // not checking level 0
814
807
  for (uint8_t level = 1; level < num_levels_; ++level) {
815
808
  const auto from = items_ + levels_[level];
816
809
  const auto to = items_ + levels_[level + 1];
817
- if (!std::is_sorted(from, to, C())) {
810
+ if (!std::is_sorted(from, to, comparator_)) {
818
811
  throw std::logic_error("levels must be sorted");
819
812
  }
820
813
  }
821
814
  }
822
815
 
823
- template<typename T, typename C, typename S, typename A>
824
- template<bool inclusive>
825
- quantile_sketch_sorted_view<T, C, A> kll_sketch<T, C, S, A>::get_sorted_view(bool cumulative) const {
816
+ template<typename T, typename C, typename A>
817
+ quantiles_sorted_view<T, C, A> kll_sketch<T, C, A>::get_sorted_view() const {
826
818
  const_cast<kll_sketch*>(this)->sort_level_zero(); // allow this side effect
827
- quantile_sketch_sorted_view<T, C, A> view(get_num_retained(), allocator_);
819
+ quantiles_sorted_view<T, C, A> view(get_num_retained(), comparator_, allocator_);
828
820
  for (uint8_t level = 0; level < num_levels_; ++level) {
829
821
  const auto from = items_ + levels_[level];
830
822
  const auto to = items_ + levels_[level + 1]; // exclusive
831
823
  view.add(from, to, 1 << level);
832
824
  }
833
- if (cumulative) view.template convert_to_cummulative<inclusive>();
825
+ view.convert_to_cummulative();
834
826
  return view;
835
827
  }
836
828
 
837
- template<typename T, typename C, typename S, typename A>
838
- template<bool inclusive>
839
- vector_d<A> kll_sketch<T, C, S, A>::get_PMF_or_CDF(const T* split_points, uint32_t size, bool is_CDF) const {
840
- if (is_empty()) return vector_d<A>(allocator_);
841
- kll_helper::validate_values<T, C>(split_points, size);
842
- vector_d<A> buckets(size + 1, 0, allocator_);
843
- uint8_t level = 0;
844
- uint64_t weight = 1;
845
- while (level < num_levels_) {
846
- const auto from_index = levels_[level];
847
- const auto to_index = levels_[level + 1]; // exclusive
848
- if ((level == 0) && !is_level_zero_sorted_) {
849
- increment_buckets_unsorted_level<inclusive>(from_index, to_index, weight, split_points, size, buckets.data());
850
- } else {
851
- increment_buckets_sorted_level<inclusive>(from_index, to_index, weight, split_points, size, buckets.data());
852
- }
853
- level++;
854
- weight *= 2;
855
- }
856
- // normalize and, if CDF, convert to cumulative
857
- if (is_CDF) {
858
- double subtotal = 0;
859
- for (uint32_t i = 0; i <= size; i++) {
860
- subtotal += buckets[i];
861
- buckets[i] = subtotal / n_;
862
- }
863
- } else {
864
- for (uint32_t i = 0; i <= size; i++) {
865
- buckets[i] /= n_;
866
- }
867
- }
868
- return buckets;
869
- }
870
-
871
- template<typename T, typename C, typename S, typename A>
872
- template<bool inclusive>
873
- void kll_sketch<T, C, S, A>::increment_buckets_unsorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
874
- const T* split_points, uint32_t size, double* buckets) const
875
- {
876
- for (uint32_t i = from_index; i < to_index; i++) {
877
- uint32_t j;
878
- for (j = 0; j < size; j++) {
879
- if (inclusive ? !C()(split_points[j], items_[i]) : C()(items_[i], split_points[j])) {
880
- break;
881
- }
882
- }
883
- buckets[j] += weight;
884
- }
885
- }
886
-
887
- template<typename T, typename C, typename S, typename A>
888
- template<bool inclusive>
889
- void kll_sketch<T, C, S, A>::increment_buckets_sorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
890
- const T* split_points, uint32_t size, double* buckets) const
891
- {
892
- uint32_t i = from_index;
893
- uint32_t j = 0;
894
- while ((i < to_index) && (j < size)) {
895
- if (inclusive ? !C()(split_points[j], items_[i]) : C()(items_[i], split_points[j])) {
896
- buckets[j] += weight; // this sample goes into this bucket
897
- i++; // move on to next sample and see whether it also goes into this bucket
898
- } else {
899
- j++; // no more samples for this bucket
900
- }
901
- }
902
- // now either i == to_index (we are out of samples), or
903
- // j == size (we are out of buckets, but there are more samples remaining)
904
- // we only need to do something in the latter case
905
- if (j == size) {
906
- buckets[j] += weight * (to_index - i);
907
- }
908
- }
909
-
910
- template<typename T, typename C, typename S, typename A>
829
+ template<typename T, typename C, typename A>
911
830
  template<typename O>
912
- void kll_sketch<T, C, S, A>::merge_higher_levels(O&& other, uint64_t final_n) {
831
+ void kll_sketch<T, C, A>::merge_higher_levels(O&& other, uint64_t final_n) {
913
832
  const uint32_t tmp_num_items = get_num_retained() + other.get_num_retained_above_level_zero();
914
833
  A alloc(allocator_);
915
834
  auto tmp_items_deleter = [tmp_num_items, &alloc](T* ptr) { alloc.deallocate(ptr, tmp_num_items); }; // no destructor needed
916
835
  const std::unique_ptr<T, decltype(tmp_items_deleter)> workbuf(allocator_.allocate(tmp_num_items), tmp_items_deleter);
917
836
  const uint8_t ub = kll_helper::ub_on_num_levels(final_n);
918
837
  const size_t work_levels_size = ub + 2; // ub+1 does not work
919
- vector_u32<A> worklevels(work_levels_size, 0, allocator_);
920
- vector_u32<A> outlevels(work_levels_size, 0, allocator_);
838
+ vector_u32 worklevels(work_levels_size, 0, allocator_);
839
+ vector_u32 outlevels(work_levels_size, 0, allocator_);
921
840
 
922
841
  const uint8_t provisional_num_levels = std::max(num_levels_, other.num_levels_);
923
842
 
@@ -950,9 +869,9 @@ void kll_sketch<T, C, S, A>::merge_higher_levels(O&& other, uint64_t final_n) {
950
869
  }
951
870
 
952
871
  // this leaves items_ uninitialized (all objects moved out and destroyed)
953
- template<typename T, typename C, typename S, typename A>
872
+ template<typename T, typename C, typename A>
954
873
  template<typename FwdSk>
955
- void kll_sketch<T, C, S, A>::populate_work_arrays(FwdSk&& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels) {
874
+ void kll_sketch<T, C, A>::populate_work_arrays(FwdSk&& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels) {
956
875
  worklevels[0] = 0;
957
876
 
958
877
  // the level zero data from "other" was already inserted into "this"
@@ -976,36 +895,36 @@ void kll_sketch<T, C, S, A>::populate_work_arrays(FwdSk&& other, T* workbuf, uin
976
895
  }
977
896
  }
978
897
 
979
- template<typename T, typename C, typename S, typename A>
980
- void kll_sketch<T, C, S, A>::assert_correct_total_weight() const {
898
+ template<typename T, typename C, typename A>
899
+ void kll_sketch<T, C, A>::assert_correct_total_weight() const {
981
900
  const uint64_t total(kll_helper::sum_the_sample_weights(num_levels_, levels_.data()));
982
901
  if (total != n_) {
983
902
  throw std::logic_error("Total weight does not match N");
984
903
  }
985
904
  }
986
905
 
987
- template<typename T, typename C, typename S, typename A>
988
- uint32_t kll_sketch<T, C, S, A>::safe_level_size(uint8_t level) const {
906
+ template<typename T, typename C, typename A>
907
+ uint32_t kll_sketch<T, C, A>::safe_level_size(uint8_t level) const {
989
908
  if (level >= num_levels_) return 0;
990
909
  return levels_[level + 1] - levels_[level];
991
910
  }
992
911
 
993
- template<typename T, typename C, typename S, typename A>
994
- uint32_t kll_sketch<T, C, S, A>::get_num_retained_above_level_zero() const {
912
+ template<typename T, typename C, typename A>
913
+ uint32_t kll_sketch<T, C, A>::get_num_retained_above_level_zero() const {
995
914
  if (num_levels_ == 1) return 0;
996
915
  return levels_[num_levels_] - levels_[1];
997
916
  }
998
917
 
999
- template<typename T, typename C, typename S, typename A>
1000
- void kll_sketch<T, C, S, A>::check_m(uint8_t m) {
918
+ template<typename T, typename C, typename A>
919
+ void kll_sketch<T, C, A>::check_m(uint8_t m) {
1001
920
  if (m != DEFAULT_M) {
1002
921
  throw std::invalid_argument("Possible corruption: M must be " + std::to_string(DEFAULT_M)
1003
922
  + ": " + std::to_string(m));
1004
923
  }
1005
924
  }
1006
925
 
1007
- template<typename T, typename C, typename S, typename A>
1008
- void kll_sketch<T, C, S, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t flags_byte) {
926
+ template<typename T, typename C, typename A>
927
+ void kll_sketch<T, C, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t flags_byte) {
1009
928
  const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
1010
929
  const bool is_single_item(flags_byte & (1 << flags::IS_SINGLE_ITEM));
1011
930
  if (is_empty || is_single_item) {
@@ -1021,8 +940,8 @@ void kll_sketch<T, C, S, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t
1021
940
  }
1022
941
  }
1023
942
 
1024
- template<typename T, typename C, typename S, typename A>
1025
- void kll_sketch<T, C, S, A>::check_serial_version(uint8_t serial_version) {
943
+ template<typename T, typename C, typename A>
944
+ void kll_sketch<T, C, A>::check_serial_version(uint8_t serial_version) {
1026
945
  if (serial_version != SERIAL_VERSION_1 && serial_version != SERIAL_VERSION_2) {
1027
946
  throw std::invalid_argument("Possible corruption: serial version mismatch: expected "
1028
947
  + std::to_string(SERIAL_VERSION_1) + " or " + std::to_string(SERIAL_VERSION_2)
@@ -1030,16 +949,16 @@ void kll_sketch<T, C, S, A>::check_serial_version(uint8_t serial_version) {
1030
949
  }
1031
950
  }
1032
951
 
1033
- template<typename T, typename C, typename S, typename A>
1034
- void kll_sketch<T, C, S, A>::check_family_id(uint8_t family_id) {
952
+ template<typename T, typename C, typename A>
953
+ void kll_sketch<T, C, A>::check_family_id(uint8_t family_id) {
1035
954
  if (family_id != FAMILY) {
1036
955
  throw std::invalid_argument("Possible corruption: family mismatch: expected "
1037
956
  + std::to_string(FAMILY) + ", got " + std::to_string(family_id));
1038
957
  }
1039
958
  }
1040
959
 
1041
- template <typename T, typename C, typename S, typename A>
1042
- string<A> kll_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items) const {
960
+ template <typename T, typename C, typename A>
961
+ string<A> kll_sketch<T, C, A>::to_string(bool print_levels, bool print_items) const {
1043
962
  // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
1044
963
  // The stream does not support passing an allocator instance, and alternatives are complicated.
1045
964
  std::ostringstream os;
@@ -1057,8 +976,8 @@ string<A> kll_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
1057
976
  os << " Capacity items : " << items_size_ << std::endl;
1058
977
  os << " Retained items : " << get_num_retained() << std::endl;
1059
978
  if (!is_empty()) {
1060
- os << " Min value : " << *min_value_ << std::endl;
1061
- os << " Max value : " << *max_value_ << std::endl;
979
+ os << " Min item : " << *min_item_ << std::endl;
980
+ os << " Max item : " << *max_item_ << std::endl;
1062
981
  }
1063
982
  os << "### End sketch summary" << std::endl;
1064
983
 
@@ -1090,25 +1009,74 @@ string<A> kll_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
1090
1009
  return string<A>(os.str().c_str(), allocator_);
1091
1010
  }
1092
1011
 
1093
- template <typename T, typename C, typename S, typename A>
1094
- typename kll_sketch<T, C, S, A>::const_iterator kll_sketch<T, C, S, A>::begin() const {
1095
- return kll_sketch<T, C, S, A>::const_iterator(items_, levels_.data(), num_levels_);
1012
+ template <typename T, typename C, typename A>
1013
+ typename kll_sketch<T, C, A>::const_iterator kll_sketch<T, C, A>::begin() const {
1014
+ return kll_sketch<T, C, A>::const_iterator(items_, levels_.data(), num_levels_);
1015
+ }
1016
+
1017
+ template <typename T, typename C, typename A>
1018
+ typename kll_sketch<T, C, A>::const_iterator kll_sketch<T, C, A>::end() const {
1019
+ return kll_sketch<T, C, A>::const_iterator(nullptr, levels_.data(), num_levels_);
1020
+ }
1021
+
1022
+ template<typename T, typename C, typename A>
1023
+ class kll_sketch<T, C, A>::item_deleter {
1024
+ public:
1025
+ item_deleter(const A& allocator): allocator_(allocator) {}
1026
+ void operator() (T* ptr) {
1027
+ if (ptr != nullptr) {
1028
+ ptr->~T();
1029
+ allocator_.deallocate(ptr, 1);
1030
+ }
1031
+ }
1032
+ private:
1033
+ A allocator_;
1034
+ };
1035
+
1036
+ template<typename T, typename C, typename A>
1037
+ class kll_sketch<T, C, A>::items_deleter {
1038
+ public:
1039
+ items_deleter(uint32_t start, uint32_t num, const A& allocator):
1040
+ allocator_(allocator), start_(start), num_(num) {}
1041
+ void operator() (T* ptr) {
1042
+ if (ptr != nullptr) {
1043
+ for (uint32_t i = start_; i < num_; ++i) ptr[i].~T();
1044
+ allocator_.deallocate(ptr, num_);
1045
+ }
1046
+ }
1047
+ private:
1048
+ A allocator_;
1049
+ uint32_t start_;
1050
+ uint32_t num_;
1051
+ };
1052
+
1053
+ template<typename T, typename C, typename A>
1054
+ void kll_sketch<T, C, A>::setup_sorted_view() const {
1055
+ if (sorted_view_ == nullptr) {
1056
+ using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
1057
+ sorted_view_ = new (AllocSortedView(allocator_).allocate(1)) quantiles_sorted_view<T, C, A>(get_sorted_view());
1058
+ }
1096
1059
  }
1097
1060
 
1098
- template <typename T, typename C, typename S, typename A>
1099
- typename kll_sketch<T, C, S, A>::const_iterator kll_sketch<T, C, S, A>::end() const {
1100
- return kll_sketch<T, C, S, A>::const_iterator(nullptr, levels_.data(), num_levels_);
1061
+ template<typename T, typename C, typename A>
1062
+ void kll_sketch<T, C, A>::reset_sorted_view() {
1063
+ if (sorted_view_ != nullptr) {
1064
+ sorted_view_->~quantiles_sorted_view();
1065
+ using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
1066
+ AllocSortedView(allocator_).deallocate(sorted_view_, 1);
1067
+ sorted_view_ = nullptr;
1068
+ }
1101
1069
  }
1102
1070
 
1103
1071
  // kll_sketch::const_iterator implementation
1104
1072
 
1105
- template<typename T, typename C, typename S, typename A>
1106
- kll_sketch<T, C, S, A>::const_iterator::const_iterator(const T* items, const uint32_t* levels, const uint8_t num_levels):
1073
+ template<typename T, typename C, typename A>
1074
+ kll_sketch<T, C, A>::const_iterator::const_iterator(const T* items, const uint32_t* levels, const uint8_t num_levels):
1107
1075
  items(items), levels(levels), num_levels(num_levels), index(items == nullptr ? levels[num_levels] : levels[0]), level(items == nullptr ? num_levels : 0), weight(1)
1108
1076
  {}
1109
1077
 
1110
- template<typename T, typename C, typename S, typename A>
1111
- typename kll_sketch<T, C, S, A>::const_iterator& kll_sketch<T, C, S, A>::const_iterator::operator++() {
1078
+ template<typename T, typename C, typename A>
1079
+ typename kll_sketch<T, C, A>::const_iterator& kll_sketch<T, C, A>::const_iterator::operator++() {
1112
1080
  ++index;
1113
1081
  if (index == levels[level + 1]) { // go to the next non-empty level
1114
1082
  do {
@@ -1119,58 +1087,32 @@ typename kll_sketch<T, C, S, A>::const_iterator& kll_sketch<T, C, S, A>::const_i
1119
1087
  return *this;
1120
1088
  }
1121
1089
 
1122
- template<typename T, typename C, typename S, typename A>
1123
- typename kll_sketch<T, C, S, A>::const_iterator& kll_sketch<T, C, S, A>::const_iterator::operator++(int) {
1090
+ template<typename T, typename C, typename A>
1091
+ typename kll_sketch<T, C, A>::const_iterator& kll_sketch<T, C, A>::const_iterator::operator++(int) {
1124
1092
  const_iterator tmp(*this);
1125
1093
  operator++();
1126
1094
  return tmp;
1127
1095
  }
1128
1096
 
1129
- template<typename T, typename C, typename S, typename A>
1130
- bool kll_sketch<T, C, S, A>::const_iterator::operator==(const const_iterator& other) const {
1097
+ template<typename T, typename C, typename A>
1098
+ bool kll_sketch<T, C, A>::const_iterator::operator==(const const_iterator& other) const {
1131
1099
  return index == other.index;
1132
1100
  }
1133
1101
 
1134
- template<typename T, typename C, typename S, typename A>
1135
- bool kll_sketch<T, C, S, A>::const_iterator::operator!=(const const_iterator& other) const {
1102
+ template<typename T, typename C, typename A>
1103
+ bool kll_sketch<T, C, A>::const_iterator::operator!=(const const_iterator& other) const {
1136
1104
  return !operator==(other);
1137
1105
  }
1138
1106
 
1139
- template<typename T, typename C, typename S, typename A>
1140
- const std::pair<const T&, const uint64_t> kll_sketch<T, C, S, A>::const_iterator::operator*() const {
1141
- return std::pair<const T&, const uint64_t>(items[index], weight);
1107
+ template<typename T, typename C, typename A>
1108
+ auto kll_sketch<T, C, A>::const_iterator::operator*() const -> const value_type {
1109
+ return value_type(items[index], weight);
1142
1110
  }
1143
1111
 
1144
- template<typename T, typename C, typename S, typename A>
1145
- class kll_sketch<T, C, S, A>::item_deleter {
1146
- public:
1147
- item_deleter(const A& allocator): allocator_(allocator) {}
1148
- void operator() (T* ptr) {
1149
- if (ptr != nullptr) {
1150
- ptr->~T();
1151
- allocator_.deallocate(ptr, 1);
1152
- }
1153
- }
1154
- private:
1155
- A allocator_;
1156
- };
1157
-
1158
- template<typename T, typename C, typename S, typename A>
1159
- class kll_sketch<T, C, S, A>::items_deleter {
1160
- public:
1161
- items_deleter(uint32_t start, uint32_t num, const A& allocator):
1162
- allocator_(allocator), start_(start), num_(num) {}
1163
- void operator() (T* ptr) {
1164
- if (ptr != nullptr) {
1165
- for (uint32_t i = start_; i < num_; ++i) ptr[i].~T();
1166
- allocator_.deallocate(ptr, num_);
1167
- }
1168
- }
1169
- private:
1170
- A allocator_;
1171
- uint32_t start_;
1172
- uint32_t num_;
1173
- };
1112
+ template<typename T, typename C, typename A>
1113
+ auto kll_sketch<T, C, A>::const_iterator::operator->() const -> const return_value_holder<value_type> {
1114
+ return **this;
1115
+ }
1174
1116
 
1175
1117
  } /* namespace datasketches */
1176
1118