datasketches 0.2.7 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/kll_wrapper.cpp +20 -20
- data/ext/datasketches/theta_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
- data/vendor/datasketches-cpp/MANIFEST.in +21 -2
- data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
- data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
- data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
- data/vendor/datasketches-cpp/pyproject.toml +17 -12
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
- data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/setup.py +14 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
- data/vendor/datasketches-cpp/tox.ini +26 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
- data/vendor/datasketches-cpp/version.cfg.in +1 -0
- metadata +14 -5
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -26,24 +26,24 @@
|
|
26
26
|
#include <iomanip>
|
27
27
|
#include <sstream>
|
28
28
|
|
29
|
-
#include "common_defs.hpp"
|
30
29
|
#include "count_zeros.hpp"
|
31
30
|
#include "conditional_forward.hpp"
|
32
|
-
#include "quantiles_sketch.hpp"
|
33
31
|
|
34
32
|
namespace datasketches {
|
35
33
|
|
36
34
|
template<typename T, typename C, typename A>
|
37
|
-
quantiles_sketch<T, C, A>::quantiles_sketch(uint16_t k, const A& allocator):
|
35
|
+
quantiles_sketch<T, C, A>::quantiles_sketch(uint16_t k, const C& comparator, const A& allocator):
|
36
|
+
comparator_(comparator),
|
38
37
|
allocator_(allocator),
|
38
|
+
is_base_buffer_sorted_(true),
|
39
39
|
k_(k),
|
40
40
|
n_(0),
|
41
41
|
bit_pattern_(0),
|
42
42
|
base_buffer_(allocator_),
|
43
43
|
levels_(allocator_),
|
44
|
-
|
45
|
-
|
46
|
-
|
44
|
+
min_item_(nullptr),
|
45
|
+
max_item_(nullptr),
|
46
|
+
sorted_view_(nullptr)
|
47
47
|
{
|
48
48
|
check_k(k_);
|
49
49
|
base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k));
|
@@ -51,18 +51,20 @@ is_sorted_(true)
|
|
51
51
|
|
52
52
|
template<typename T, typename C, typename A>
|
53
53
|
quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch& other):
|
54
|
+
comparator_(other.comparator_),
|
54
55
|
allocator_(other.allocator_),
|
56
|
+
is_base_buffer_sorted_(other.is_base_buffer_sorted_),
|
55
57
|
k_(other.k_),
|
56
58
|
n_(other.n_),
|
57
59
|
bit_pattern_(other.bit_pattern_),
|
58
60
|
base_buffer_(other.base_buffer_),
|
59
61
|
levels_(other.levels_),
|
60
|
-
|
61
|
-
|
62
|
-
|
62
|
+
min_item_(nullptr),
|
63
|
+
max_item_(nullptr),
|
64
|
+
sorted_view_(nullptr)
|
63
65
|
{
|
64
|
-
if (other.
|
65
|
-
if (other.
|
66
|
+
if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
|
67
|
+
if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
|
66
68
|
for (size_t i = 0; i < levels_.size(); ++i) {
|
67
69
|
if (levels_[i].capacity() != other.levels_[i].capacity()) {
|
68
70
|
levels_[i].reserve(other.levels_[i].capacity());
|
@@ -72,63 +74,71 @@ is_sorted_(other.is_sorted_)
|
|
72
74
|
|
73
75
|
template<typename T, typename C, typename A>
|
74
76
|
quantiles_sketch<T, C, A>::quantiles_sketch(quantiles_sketch&& other) noexcept:
|
77
|
+
comparator_(other.comparator_),
|
75
78
|
allocator_(other.allocator_),
|
79
|
+
is_base_buffer_sorted_(other.is_base_buffer_sorted_),
|
76
80
|
k_(other.k_),
|
77
81
|
n_(other.n_),
|
78
82
|
bit_pattern_(other.bit_pattern_),
|
79
83
|
base_buffer_(std::move(other.base_buffer_)),
|
80
84
|
levels_(std::move(other.levels_)),
|
81
|
-
|
82
|
-
|
83
|
-
|
85
|
+
min_item_(other.min_item_),
|
86
|
+
max_item_(other.max_item_),
|
87
|
+
sorted_view_(nullptr)
|
84
88
|
{
|
85
|
-
other.
|
86
|
-
other.
|
89
|
+
other.min_item_ = nullptr;
|
90
|
+
other.max_item_ = nullptr;
|
87
91
|
}
|
88
92
|
|
89
93
|
template<typename T, typename C, typename A>
|
90
94
|
quantiles_sketch<T, C, A>& quantiles_sketch<T, C, A>::operator=(const quantiles_sketch& other) {
|
91
95
|
quantiles_sketch<T, C, A> copy(other);
|
96
|
+
std::swap(comparator_, copy.comparator_);
|
92
97
|
std::swap(allocator_, copy.allocator_);
|
98
|
+
std::swap(is_base_buffer_sorted_, copy.is_base_buffer_sorted_);
|
93
99
|
std::swap(k_, copy.k_);
|
94
100
|
std::swap(n_, copy.n_);
|
95
101
|
std::swap(bit_pattern_, copy.bit_pattern_);
|
96
102
|
std::swap(base_buffer_, copy.base_buffer_);
|
97
103
|
std::swap(levels_, copy.levels_);
|
98
|
-
std::swap(
|
99
|
-
std::swap(
|
100
|
-
|
104
|
+
std::swap(min_item_, copy.min_item_);
|
105
|
+
std::swap(max_item_, copy.max_item_);
|
106
|
+
reset_sorted_view();
|
101
107
|
return *this;
|
102
108
|
}
|
103
109
|
|
104
110
|
template<typename T, typename C, typename A>
|
105
111
|
quantiles_sketch<T, C, A>& quantiles_sketch<T, C, A>::operator=(quantiles_sketch&& other) noexcept {
|
112
|
+
std::swap(comparator_, other.comparator_);
|
106
113
|
std::swap(allocator_, other.allocator_);
|
114
|
+
std::swap(is_base_buffer_sorted_, other.is_base_buffer_sorted_);
|
107
115
|
std::swap(k_, other.k_);
|
108
116
|
std::swap(n_, other.n_);
|
109
117
|
std::swap(bit_pattern_, other.bit_pattern_);
|
110
118
|
std::swap(base_buffer_, other.base_buffer_);
|
111
119
|
std::swap(levels_, other.levels_);
|
112
|
-
std::swap(
|
113
|
-
std::swap(
|
114
|
-
|
120
|
+
std::swap(min_item_, other.min_item_);
|
121
|
+
std::swap(max_item_, other.max_item_);
|
122
|
+
reset_sorted_view();
|
115
123
|
return *this;
|
116
124
|
}
|
117
125
|
|
118
126
|
template<typename T, typename C, typename A>
|
119
127
|
quantiles_sketch<T, C, A>::quantiles_sketch(uint16_t k, uint64_t n, uint64_t bit_pattern,
|
120
128
|
Level&& base_buffer, VectorLevels&& levels,
|
121
|
-
std::unique_ptr<T, item_deleter>
|
122
|
-
bool is_sorted, const A& allocator)
|
129
|
+
std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item,
|
130
|
+
bool is_sorted, const C& comparator, const A& allocator):
|
131
|
+
comparator_(comparator),
|
123
132
|
allocator_(allocator),
|
133
|
+
is_base_buffer_sorted_(is_sorted),
|
124
134
|
k_(k),
|
125
135
|
n_(n),
|
126
136
|
bit_pattern_(bit_pattern),
|
127
137
|
base_buffer_(std::move(base_buffer)),
|
128
138
|
levels_(std::move(levels)),
|
129
|
-
|
130
|
-
|
131
|
-
|
139
|
+
min_item_(min_item.release()),
|
140
|
+
max_item_(max_item.release()),
|
141
|
+
sorted_view_(nullptr)
|
132
142
|
{
|
133
143
|
uint32_t item_count = base_buffer_.size();
|
134
144
|
for (Level& lvl : levels_) {
|
@@ -140,16 +150,19 @@ is_sorted_(is_sorted)
|
|
140
150
|
|
141
151
|
template<typename T, typename C, typename A>
|
142
152
|
template<typename From, typename FC, typename FA>
|
143
|
-
quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch<From, FC, FA>& other,
|
153
|
+
quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch<From, FC, FA>& other,
|
154
|
+
const C& comparator, const A& allocator):
|
155
|
+
comparator_(comparator),
|
144
156
|
allocator_(allocator),
|
157
|
+
is_base_buffer_sorted_(false),
|
145
158
|
k_(other.get_k()),
|
146
159
|
n_(other.get_n()),
|
147
160
|
bit_pattern_(compute_bit_pattern(other.get_k(), other.get_n())),
|
148
161
|
base_buffer_(allocator),
|
149
162
|
levels_(allocator),
|
150
|
-
|
151
|
-
|
152
|
-
|
163
|
+
min_item_(nullptr),
|
164
|
+
max_item_(nullptr),
|
165
|
+
sorted_view_(nullptr)
|
153
166
|
{
|
154
167
|
static_assert(std::is_constructible<T, From>::value,
|
155
168
|
"Type converting constructor requires new type to be constructible from existing type");
|
@@ -157,8 +170,8 @@ is_sorted_(false)
|
|
157
170
|
base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k_));
|
158
171
|
|
159
172
|
if (!other.is_empty()) {
|
160
|
-
|
161
|
-
|
173
|
+
min_item_ = new (allocator_.allocate(1)) T(other.get_min_item());
|
174
|
+
max_item_ = new (allocator_.allocate(1)) T(other.get_max_item());
|
162
175
|
|
163
176
|
// reserve space in levels
|
164
177
|
const uint8_t num_levels = compute_levels_needed(k_, n_);
|
@@ -189,7 +202,7 @@ is_sorted_(false)
|
|
189
202
|
// validate that ordering within each level is preserved
|
190
203
|
// base_buffer_ can be considered unsorted for this purpose
|
191
204
|
for (int i = 0; i < num_levels; ++i) {
|
192
|
-
if (!std::is_sorted(levels_[i].begin(), levels_[i].end(),
|
205
|
+
if (!std::is_sorted(levels_[i].begin(), levels_[i].end(), comparator_)) {
|
193
206
|
throw std::logic_error("Copy construction across types produces invalid sorting");
|
194
207
|
}
|
195
208
|
}
|
@@ -199,40 +212,38 @@ is_sorted_(false)
|
|
199
212
|
|
200
213
|
template<typename T, typename C, typename A>
|
201
214
|
quantiles_sketch<T, C, A>::~quantiles_sketch() {
|
202
|
-
if (
|
203
|
-
|
204
|
-
allocator_.deallocate(
|
215
|
+
if (min_item_ != nullptr) {
|
216
|
+
min_item_->~T();
|
217
|
+
allocator_.deallocate(min_item_, 1);
|
205
218
|
}
|
206
|
-
if (
|
207
|
-
|
208
|
-
allocator_.deallocate(
|
219
|
+
if (max_item_ != nullptr) {
|
220
|
+
max_item_->~T();
|
221
|
+
allocator_.deallocate(max_item_, 1);
|
209
222
|
}
|
223
|
+
reset_sorted_view();
|
210
224
|
}
|
211
225
|
|
212
226
|
template<typename T, typename C, typename A>
|
213
227
|
template<typename FwdT>
|
214
228
|
void quantiles_sketch<T, C, A>::update(FwdT&& item) {
|
215
|
-
if (!
|
229
|
+
if (!check_update_item(item)) { return; }
|
216
230
|
if (is_empty()) {
|
217
|
-
|
218
|
-
|
231
|
+
min_item_ = new (allocator_.allocate(1)) T(item);
|
232
|
+
max_item_ = new (allocator_.allocate(1)) T(item);
|
219
233
|
} else {
|
220
|
-
if (
|
221
|
-
if (
|
234
|
+
if (comparator_(item, *min_item_)) *min_item_ = item;
|
235
|
+
if (comparator_(*max_item_, item)) *max_item_ = item;
|
222
236
|
}
|
223
237
|
|
224
238
|
// if exceed capacity, grow until size 2k -- assumes eager processing
|
225
|
-
if (base_buffer_.size() + 1 > base_buffer_.capacity())
|
226
|
-
grow_base_buffer();
|
239
|
+
if (base_buffer_.size() + 1 > base_buffer_.capacity()) grow_base_buffer();
|
227
240
|
|
228
241
|
base_buffer_.push_back(std::forward<FwdT>(item));
|
229
242
|
++n_;
|
230
243
|
|
231
|
-
if (base_buffer_.size() > 1)
|
232
|
-
|
233
|
-
|
234
|
-
if (base_buffer_.size() == 2 * k_)
|
235
|
-
process_full_base_buffer();
|
244
|
+
if (base_buffer_.size() > 1) is_base_buffer_sorted_ = false;
|
245
|
+
if (base_buffer_.size() == 2 * k_) process_full_base_buffer();
|
246
|
+
reset_sorted_view();
|
236
247
|
}
|
237
248
|
|
238
249
|
template<typename T, typename C, typename A>
|
@@ -245,10 +256,11 @@ void quantiles_sketch<T, C, A>::merge(FwdSk&& other) {
|
|
245
256
|
for (auto item : other.base_buffer_) {
|
246
257
|
update(conditional_forward<FwdSk>(item));
|
247
258
|
}
|
248
|
-
|
259
|
+
reset_sorted_view();
|
260
|
+
return;
|
249
261
|
}
|
250
262
|
|
251
|
-
//
|
263
|
+
// other has data and is in estimation mode
|
252
264
|
if (is_estimation_mode()) {
|
253
265
|
if (k_ == other.get_k()) {
|
254
266
|
standard_merge(*this, other);
|
@@ -273,6 +285,7 @@ void quantiles_sketch<T, C, A>::merge(FwdSk&& other) {
|
|
273
285
|
}
|
274
286
|
*this = sk_copy;
|
275
287
|
}
|
288
|
+
reset_sorted_view();
|
276
289
|
}
|
277
290
|
|
278
291
|
template<typename T, typename C, typename A>
|
@@ -286,8 +299,8 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
|
|
286
299
|
write(os, family);
|
287
300
|
|
288
301
|
// side-effect: sort base buffer since always compact
|
289
|
-
|
290
|
-
|
302
|
+
std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), comparator_);
|
303
|
+
const_cast<quantiles_sketch*>(this)->is_base_buffer_sorted_ = true;
|
291
304
|
|
292
305
|
// empty, ordered, compact are valid flags
|
293
306
|
const uint8_t flags_byte(
|
@@ -304,8 +317,8 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
|
|
304
317
|
write(os, n_);
|
305
318
|
|
306
319
|
// min and max
|
307
|
-
serde.serialize(os,
|
308
|
-
serde.serialize(os,
|
320
|
+
serde.serialize(os, min_item_, 1);
|
321
|
+
serde.serialize(os, max_item_, 1);
|
309
322
|
|
310
323
|
// base buffer items
|
311
324
|
serde.serialize(os, base_buffer_.data(), static_cast<unsigned>(base_buffer_.size()));
|
@@ -334,8 +347,8 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
|
|
334
347
|
ptr += copy_to_mem(family, ptr);
|
335
348
|
|
336
349
|
// side-effect: sort base buffer since always compact
|
337
|
-
|
338
|
-
|
350
|
+
std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), comparator_);
|
351
|
+
const_cast<quantiles_sketch*>(this)->is_base_buffer_sorted_ = true;
|
339
352
|
|
340
353
|
// empty, ordered, compact are valid flags
|
341
354
|
const uint8_t flags_byte(
|
@@ -352,8 +365,8 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
|
|
352
365
|
ptr += copy_to_mem(n_, ptr);
|
353
366
|
|
354
367
|
// min and max
|
355
|
-
ptr += serde.serialize(ptr, end_ptr - ptr,
|
356
|
-
ptr += serde.serialize(ptr, end_ptr - ptr,
|
368
|
+
ptr += serde.serialize(ptr, end_ptr - ptr, min_item_, 1);
|
369
|
+
ptr += serde.serialize(ptr, end_ptr - ptr, max_item_, 1);
|
357
370
|
|
358
371
|
// base buffer items
|
359
372
|
if (base_buffer_.size() > 0)
|
@@ -371,7 +384,8 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
|
|
371
384
|
|
372
385
|
template<typename T, typename C, typename A>
|
373
386
|
template<typename SerDe>
|
374
|
-
auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde,
|
387
|
+
auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde,
|
388
|
+
const C& comparator, const A &allocator) -> quantiles_sketch {
|
375
389
|
const auto preamble_longs = read<uint8_t>(is);
|
376
390
|
const auto serial_version = read<uint8_t>(is);
|
377
391
|
const auto family_id = read<uint8_t>(is);
|
@@ -387,7 +401,7 @@ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde
|
|
387
401
|
if (!is.good()) throw std::runtime_error("error reading from std::istream");
|
388
402
|
const bool is_empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
|
389
403
|
if (is_empty) {
|
390
|
-
return quantiles_sketch(k, allocator);
|
404
|
+
return quantiles_sketch(k, comparator, allocator);
|
391
405
|
}
|
392
406
|
|
393
407
|
const auto items_seen = read<uint64_t>(is);
|
@@ -397,17 +411,17 @@ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde
|
|
397
411
|
|
398
412
|
A alloc(allocator);
|
399
413
|
auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
|
400
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
401
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
402
|
-
std::unique_ptr<T, item_deleter>
|
403
|
-
std::unique_ptr<T, item_deleter>
|
414
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
415
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
416
|
+
std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
|
417
|
+
std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
|
404
418
|
|
405
|
-
serde.deserialize(is,
|
419
|
+
serde.deserialize(is, min_item_buffer.get(), 1);
|
406
420
|
// serde call did not throw, repackage with destrtuctor
|
407
|
-
|
408
|
-
serde.deserialize(is,
|
421
|
+
min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
|
422
|
+
serde.deserialize(is, max_item_buffer.get(), 1);
|
409
423
|
// serde call did not throw, repackage with destrtuctor
|
410
|
-
|
424
|
+
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
411
425
|
|
412
426
|
if (serial_version == 1) {
|
413
427
|
read<uint64_t>(is); // no longer used
|
@@ -449,7 +463,8 @@ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde
|
|
449
463
|
}
|
450
464
|
|
451
465
|
return quantiles_sketch(k, items_seen, bit_pattern,
|
452
|
-
std::move(base_buffer), std::move(levels), std::move(
|
466
|
+
std::move(base_buffer), std::move(levels), std::move(min_item), std::move(max_item), is_sorted,
|
467
|
+
comparator, allocator);
|
453
468
|
}
|
454
469
|
|
455
470
|
template<typename T, typename C, typename A>
|
@@ -473,7 +488,8 @@ auto quantiles_sketch<T, C, A>::deserialize_array(std::istream& is, uint32_t num
|
|
473
488
|
|
474
489
|
template<typename T, typename C, typename A>
|
475
490
|
template<typename SerDe>
|
476
|
-
auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, const SerDe& serde,
|
491
|
+
auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, const SerDe& serde,
|
492
|
+
const C& comparator, const A &allocator) -> quantiles_sketch {
|
477
493
|
ensure_minimum_memory(size, 8);
|
478
494
|
const char* ptr = static_cast<const char*>(bytes);
|
479
495
|
const char* end_ptr = static_cast<const char*>(bytes) + size;
|
@@ -498,7 +514,7 @@ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, cons
|
|
498
514
|
|
499
515
|
const bool is_empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
|
500
516
|
if (is_empty) {
|
501
|
-
return quantiles_sketch(k, allocator);
|
517
|
+
return quantiles_sketch(k, comparator, allocator);
|
502
518
|
}
|
503
519
|
|
504
520
|
ensure_minimum_memory(size, 16);
|
@@ -510,17 +526,17 @@ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, cons
|
|
510
526
|
|
511
527
|
A alloc(allocator);
|
512
528
|
auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
|
513
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
514
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
515
|
-
std::unique_ptr<T, item_deleter>
|
516
|
-
std::unique_ptr<T, item_deleter>
|
529
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
530
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
531
|
+
std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
|
532
|
+
std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
|
517
533
|
|
518
|
-
ptr += serde.deserialize(ptr, end_ptr - ptr,
|
534
|
+
ptr += serde.deserialize(ptr, end_ptr - ptr, min_item_buffer.get(), 1);
|
519
535
|
// serde call did not throw, repackage with destrtuctor
|
520
|
-
|
521
|
-
ptr += serde.deserialize(ptr, end_ptr - ptr,
|
536
|
+
min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
|
537
|
+
ptr += serde.deserialize(ptr, end_ptr - ptr, max_item_buffer.get(), 1);
|
522
538
|
// serde call did not throw, repackage with destrtuctor
|
523
|
-
|
539
|
+
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
524
540
|
|
525
541
|
if (serial_version == 1) {
|
526
542
|
uint64_t unused_long;
|
@@ -567,7 +583,8 @@ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, cons
|
|
567
583
|
}
|
568
584
|
|
569
585
|
return quantiles_sketch(k, items_seen, bit_pattern,
|
570
|
-
std::move(base_buffer_pair.first), std::move(levels), std::move(
|
586
|
+
std::move(base_buffer_pair.first), std::move(levels), std::move(min_item), std::move(max_item), is_sorted,
|
587
|
+
comparator, allocator);
|
571
588
|
}
|
572
589
|
|
573
590
|
template<typename T, typename C, typename A>
|
@@ -605,11 +622,11 @@ string<A> quantiles_sketch<T, C, A>::to_string(bool print_levels, bool print_ite
|
|
605
622
|
os << " Empty : " << (is_empty() ? "true" : "false") << std::endl;
|
606
623
|
os << " Estimation mode: " << (is_estimation_mode() ? "true" : "false") << std::endl;
|
607
624
|
os << " Levels (w/o BB): " << levels_.size() << std::endl;
|
608
|
-
os << " Used Levels : " <<
|
625
|
+
os << " Used Levels : " << count_valid_levels(bit_pattern_) << std::endl;
|
609
626
|
os << " Retained items : " << get_num_retained() << std::endl;
|
610
627
|
if (!is_empty()) {
|
611
|
-
os << " Min
|
612
|
-
os << " Max
|
628
|
+
os << " Min item : " << *min_item_ << std::endl;
|
629
|
+
os << " Max item : " << *max_item_ << std::endl;
|
613
630
|
}
|
614
631
|
os << "### End sketch summary" << std::endl;
|
615
632
|
|
@@ -667,20 +684,20 @@ uint32_t quantiles_sketch<T, C, A>::get_num_retained() const {
|
|
667
684
|
}
|
668
685
|
|
669
686
|
template<typename T, typename C, typename A>
|
670
|
-
const T& quantiles_sketch<T, C, A>::
|
671
|
-
if (is_empty())
|
672
|
-
return *
|
687
|
+
const T& quantiles_sketch<T, C, A>::get_min_item() const {
|
688
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
689
|
+
return *min_item_;
|
673
690
|
}
|
674
691
|
|
675
692
|
template<typename T, typename C, typename A>
|
676
|
-
const T& quantiles_sketch<T, C, A>::
|
677
|
-
if (is_empty())
|
678
|
-
return *
|
693
|
+
const T& quantiles_sketch<T, C, A>::get_max_item() const {
|
694
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
695
|
+
return *max_item_;
|
679
696
|
}
|
680
697
|
|
681
698
|
template<typename T, typename C, typename A>
|
682
699
|
C quantiles_sketch<T, C, A>::get_comparator() const {
|
683
|
-
return
|
700
|
+
return comparator_;
|
684
701
|
}
|
685
702
|
|
686
703
|
template<typename T, typename C, typename A>
|
@@ -702,8 +719,8 @@ template<typename SerDe, typename TT, typename std::enable_if<!std::is_arithmeti
|
|
702
719
|
size_t quantiles_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& serde) const {
|
703
720
|
if (is_empty()) { return EMPTY_SIZE_BYTES; }
|
704
721
|
size_t size = DATA_START;
|
705
|
-
size += serde.size_of_item(*
|
706
|
-
size += serde.size_of_item(*
|
722
|
+
size += serde.size_of_item(*min_item_);
|
723
|
+
size += serde.size_of_item(*max_item_);
|
707
724
|
for (auto it: *this) size += serde.size_of_item(it.first);
|
708
725
|
return size;
|
709
726
|
}
|
@@ -721,162 +738,121 @@ double quantiles_sketch<T, C, A>::get_normalized_rank_error(uint16_t k, bool is_
|
|
721
738
|
}
|
722
739
|
|
723
740
|
template<typename T, typename C, typename A>
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), C());
|
741
|
+
quantiles_sorted_view<T, C, A> quantiles_sketch<T, C, A>::get_sorted_view() const {
|
742
|
+
// allow side-effect of sorting the base buffer
|
743
|
+
if (!is_base_buffer_sorted_) {
|
744
|
+
std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), comparator_);
|
745
|
+
const_cast<quantiles_sketch*>(this)->is_base_buffer_sorted_ = true;
|
730
746
|
}
|
731
|
-
|
747
|
+
quantiles_sorted_view<T, C, A> view(get_num_retained(), comparator_, allocator_);
|
732
748
|
|
733
749
|
uint64_t weight = 1;
|
734
750
|
view.add(base_buffer_.begin(), base_buffer_.end(), weight);
|
735
|
-
for (auto& level
|
751
|
+
for (const auto& level: levels_) {
|
736
752
|
weight <<= 1;
|
737
753
|
if (level.empty()) { continue; }
|
738
754
|
view.add(level.begin(), level.end(), weight);
|
739
755
|
}
|
740
756
|
|
741
|
-
|
757
|
+
view.convert_to_cummulative();
|
742
758
|
return view;
|
743
759
|
}
|
744
760
|
|
745
761
|
template<typename T, typename C, typename A>
|
746
|
-
|
747
|
-
|
748
|
-
if (is_empty()) return get_invalid_value();
|
749
|
-
if (rank == 0.0) return *min_value_;
|
750
|
-
if (rank == 1.0) return *max_value_;
|
762
|
+
auto quantiles_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
|
763
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
751
764
|
if ((rank < 0.0) || (rank > 1.0)) {
|
752
|
-
throw std::invalid_argument("
|
765
|
+
throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
|
753
766
|
}
|
754
767
|
// possible side-effect: sorting base buffer
|
755
|
-
|
768
|
+
setup_sorted_view();
|
769
|
+
return sorted_view_->get_quantile(rank, inclusive);
|
756
770
|
}
|
757
771
|
|
758
772
|
template<typename T, typename C, typename A>
|
759
|
-
|
760
|
-
|
773
|
+
std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
|
774
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
761
775
|
std::vector<T, A> quantiles(allocator_);
|
762
|
-
if (is_empty()) return quantiles;
|
763
776
|
quantiles.reserve(size);
|
764
777
|
|
765
778
|
// possible side-effect: sorting base buffer
|
766
|
-
|
779
|
+
setup_sorted_view();
|
767
780
|
|
768
781
|
for (uint32_t i = 0; i < size; ++i) {
|
769
782
|
const double rank = ranks[i];
|
770
783
|
if ((rank < 0.0) || (rank > 1.0)) {
|
771
|
-
throw std::invalid_argument("rank cannot be less than
|
772
|
-
}
|
773
|
-
if (rank == 0.0) quantiles.push_back(*min_value_);
|
774
|
-
else if (rank == 1.0) quantiles.push_back(*max_value_);
|
775
|
-
else {
|
776
|
-
quantiles.push_back(view.get_quantile(rank));
|
784
|
+
throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
|
777
785
|
}
|
786
|
+
quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
|
778
787
|
}
|
779
788
|
return quantiles;
|
780
789
|
}
|
781
790
|
|
782
791
|
template<typename T, typename C, typename A>
|
783
|
-
|
784
|
-
std::
|
785
|
-
if (is_empty()) return std::vector<T, A>(allocator_);
|
792
|
+
std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(uint32_t num, bool inclusive) const {
|
793
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
786
794
|
if (num == 0) {
|
787
795
|
throw std::invalid_argument("num must be > 0");
|
788
796
|
}
|
789
|
-
vector_double
|
790
|
-
|
797
|
+
vector_double ranks(num, 0, allocator_);
|
798
|
+
ranks[0] = 0.0;
|
791
799
|
for (size_t i = 1; i < num; i++) {
|
792
|
-
|
800
|
+
ranks[i] = static_cast<double>(i) / (num - 1);
|
793
801
|
}
|
794
802
|
if (num > 1) {
|
795
|
-
|
803
|
+
ranks[num - 1] = 1.0;
|
796
804
|
}
|
797
|
-
return get_quantiles
|
805
|
+
return get_quantiles(ranks.data(), num, inclusive);
|
798
806
|
}
|
799
807
|
|
800
808
|
template<typename T, typename C, typename A>
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
uint64_t total = 0;
|
806
|
-
for (const T &item: base_buffer_) {
|
807
|
-
if (inclusive ? !C()(value, item) : C()(item, value))
|
808
|
-
total += weight;
|
809
|
-
}
|
810
|
-
|
811
|
-
weight *= 2;
|
812
|
-
for (uint8_t level = 0; level < levels_.size(); ++level, weight *= 2) {
|
813
|
-
if (levels_[level].empty()) { continue; }
|
814
|
-
const T* data = levels_[level].data();
|
815
|
-
for (uint16_t i = 0; i < k_; ++i) {
|
816
|
-
if (inclusive ? !C()(value, data[i]) : C()(data[i], value))
|
817
|
-
total += weight;
|
818
|
-
else
|
819
|
-
break; // levels are sorted, no point comparing further
|
820
|
-
}
|
821
|
-
}
|
822
|
-
return (double) total / n_;
|
809
|
+
double quantiles_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
|
810
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
811
|
+
setup_sorted_view();
|
812
|
+
return sorted_view_->get_rank(item, inclusive);
|
823
813
|
}
|
824
814
|
|
825
815
|
template<typename T, typename C, typename A>
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
for (uint32_t i = size; i > 0; --i) {
|
831
|
-
buckets[i] -= buckets[i - 1];
|
832
|
-
}
|
833
|
-
return buckets;
|
816
|
+
auto quantiles_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
|
817
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
818
|
+
setup_sorted_view();
|
819
|
+
return sorted_view_->get_PMF(split_points, size, inclusive);
|
834
820
|
}
|
835
821
|
|
836
822
|
template<typename T, typename C, typename A>
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
check_split_points(split_points, size);
|
842
|
-
buckets.reserve(size + 1);
|
843
|
-
for (uint32_t i = 0; i < size; ++i) buckets.push_back(get_rank<inclusive>(split_points[i]));
|
844
|
-
buckets.push_back(1);
|
845
|
-
return buckets;
|
823
|
+
auto quantiles_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
|
824
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
825
|
+
setup_sorted_view();
|
826
|
+
return sorted_view_->get_CDF(split_points, size, inclusive);
|
846
827
|
}
|
847
828
|
|
848
829
|
template<typename T, typename C, typename A>
|
849
|
-
uint32_t quantiles_sketch<T, C, A>::compute_retained_items(
|
830
|
+
uint32_t quantiles_sketch<T, C, A>::compute_retained_items(uint16_t k, uint64_t n) {
|
850
831
|
const uint32_t bb_count = compute_base_buffer_items(k, n);
|
851
832
|
const uint64_t bit_pattern = compute_bit_pattern(k, n);
|
852
|
-
const uint32_t valid_levels =
|
833
|
+
const uint32_t valid_levels = count_valid_levels(bit_pattern);
|
853
834
|
return bb_count + (k * valid_levels);
|
854
835
|
}
|
855
836
|
|
856
837
|
template<typename T, typename C, typename A>
|
857
|
-
uint32_t quantiles_sketch<T, C, A>::compute_base_buffer_items(
|
838
|
+
uint32_t quantiles_sketch<T, C, A>::compute_base_buffer_items(uint16_t k, uint64_t n) {
|
858
839
|
return n % (static_cast<uint64_t>(2) * k);
|
859
840
|
}
|
860
841
|
|
861
842
|
template<typename T, typename C, typename A>
|
862
|
-
uint64_t quantiles_sketch<T, C, A>::compute_bit_pattern(
|
843
|
+
uint64_t quantiles_sketch<T, C, A>::compute_bit_pattern(uint16_t k, uint64_t n) {
|
863
844
|
return n / (static_cast<uint64_t>(2) * k);
|
864
845
|
}
|
865
846
|
|
866
847
|
template<typename T, typename C, typename A>
|
867
|
-
uint32_t quantiles_sketch<T, C, A>::
|
868
|
-
// TODO: Java's Long.bitCount() probably uses a better method
|
869
|
-
uint64_t bp = bit_pattern;
|
848
|
+
uint32_t quantiles_sketch<T, C, A>::count_valid_levels(uint64_t bit_pattern) {
|
870
849
|
uint32_t count = 0;
|
871
|
-
|
872
|
-
if ((bp & 0x01) == 1) ++count;
|
873
|
-
bp >>= 1;
|
874
|
-
}
|
850
|
+
for (; bit_pattern > 0; ++count) bit_pattern &= bit_pattern - 1;
|
875
851
|
return count;
|
876
852
|
}
|
877
853
|
|
878
854
|
template<typename T, typename C, typename A>
|
879
|
-
uint8_t quantiles_sketch<T, C, A>::compute_levels_needed(
|
855
|
+
uint8_t quantiles_sketch<T, C, A>::compute_levels_needed(uint16_t k, uint64_t n) {
|
880
856
|
return static_cast<uint8_t>(64U) - count_leading_zeros_in_u64(n / (2 * k));
|
881
857
|
}
|
882
858
|
|
@@ -961,13 +937,13 @@ void quantiles_sketch<T, C, A>::process_full_base_buffer() {
|
|
961
937
|
// make sure there will be enough levels for the propagation
|
962
938
|
grow_levels_if_needed(); // note: n_ was already incremented by update() before this
|
963
939
|
|
964
|
-
std::sort(base_buffer_.begin(), base_buffer_.end(),
|
940
|
+
std::sort(base_buffer_.begin(), base_buffer_.end(), comparator_);
|
965
941
|
in_place_propagate_carry(0,
|
966
942
|
levels_[0], // unused here, but 0 is guaranteed to exist
|
967
943
|
base_buffer_,
|
968
944
|
true, *this);
|
969
945
|
base_buffer_.clear();
|
970
|
-
|
946
|
+
is_base_buffer_sorted_ = true;
|
971
947
|
if (n_ / (2 * k_) != bit_pattern_) {
|
972
948
|
throw std::logic_error("Internal error: n / 2k (" + std::to_string(n_ / 2 * k_)
|
973
949
|
+ " != bit_pattern " + std::to_string(bit_pattern_));
|
@@ -1019,7 +995,7 @@ void quantiles_sketch<T, C, A>::in_place_propagate_carry(uint8_t starting_level,
|
|
1019
995
|
merge_two_size_k_buffers(
|
1020
996
|
sketch.levels_[lvl],
|
1021
997
|
sketch.levels_[ending_level],
|
1022
|
-
buf_size_2k);
|
998
|
+
buf_size_2k, sketch.get_comparator());
|
1023
999
|
sketch.levels_[lvl].clear();
|
1024
1000
|
sketch.levels_[ending_level].clear();
|
1025
1001
|
zip_buffer(buf_size_2k, sketch.levels_[ending_level]);
|
@@ -1071,9 +1047,9 @@ void quantiles_sketch<T, C, A>::zip_buffer_with_stride(FwdV&& buf_in, Level& buf
|
|
1071
1047
|
// do not clear input buffer
|
1072
1048
|
}
|
1073
1049
|
|
1074
|
-
|
1075
1050
|
template<typename T, typename C, typename A>
|
1076
|
-
void quantiles_sketch<T, C, A>::merge_two_size_k_buffers(Level& src_1, Level& src_2,
|
1051
|
+
void quantiles_sketch<T, C, A>::merge_two_size_k_buffers(Level& src_1, Level& src_2,
|
1052
|
+
Level& dst, const C& comparator) {
|
1077
1053
|
if (src_1.size() != src_2.size()
|
1078
1054
|
|| src_1.size() * 2 != dst.capacity()
|
1079
1055
|
|| dst.size() != 0) {
|
@@ -1085,7 +1061,7 @@ void quantiles_sketch<T, C, A>::merge_two_size_k_buffers(Level& src_1, Level& sr
|
|
1085
1061
|
|
1086
1062
|
// TODO: probably actually doing copies given Level&?
|
1087
1063
|
while (it1 != end1 && it2 != end2) {
|
1088
|
-
if (
|
1064
|
+
if (comparator(*it1, *it2)) {
|
1089
1065
|
dst.push_back(std::move(*it1++));
|
1090
1066
|
} else {
|
1091
1067
|
dst.push_back(std::move(*it2++));
|
@@ -1100,7 +1076,6 @@ void quantiles_sketch<T, C, A>::merge_two_size_k_buffers(Level& src_1, Level& sr
|
|
1100
1076
|
}
|
1101
1077
|
}
|
1102
1078
|
|
1103
|
-
|
1104
1079
|
template<typename T, typename C, typename A>
|
1105
1080
|
template<typename FwdSk>
|
1106
1081
|
void quantiles_sketch<T, C, A>::standard_merge(quantiles_sketch& tgt, FwdSk&& src) {
|
@@ -1149,25 +1124,24 @@ void quantiles_sketch<T, C, A>::standard_merge(quantiles_sketch& tgt, FwdSk&& sr
|
|
1149
1124
|
throw std::logic_error("Failed internal consistency check after standard_merge()");
|
1150
1125
|
}
|
1151
1126
|
|
1152
|
-
// update min and max
|
1127
|
+
// update min and max items
|
1153
1128
|
// can't just check is_empty() since min and max might not have been set if
|
1154
1129
|
// there were no base buffer items added via update()
|
1155
|
-
if (tgt.
|
1156
|
-
tgt.
|
1130
|
+
if (tgt.min_item_ == nullptr) {
|
1131
|
+
tgt.min_item_ = new (tgt.allocator_.allocate(1)) T(*src.min_item_);
|
1157
1132
|
} else {
|
1158
|
-
if (
|
1159
|
-
*tgt.
|
1133
|
+
if (tgt.comparator_(*src.min_item_, *tgt.min_item_))
|
1134
|
+
*tgt.min_item_ = conditional_forward<FwdSk>(*src.min_item_);
|
1160
1135
|
}
|
1161
1136
|
|
1162
|
-
if (tgt.
|
1163
|
-
tgt.
|
1137
|
+
if (tgt.max_item_ == nullptr) {
|
1138
|
+
tgt.max_item_ = new (tgt.allocator_.allocate(1)) T(*src.max_item_);
|
1164
1139
|
} else {
|
1165
|
-
if (
|
1166
|
-
*tgt.
|
1140
|
+
if (tgt.comparator_(*tgt.max_item_, *src.max_item_))
|
1141
|
+
*tgt.max_item_ = conditional_forward<FwdSk>(*src.max_item_);
|
1167
1142
|
}
|
1168
1143
|
}
|
1169
1144
|
|
1170
|
-
|
1171
1145
|
template<typename T, typename C, typename A>
|
1172
1146
|
template<typename FwdSk>
|
1173
1147
|
void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&& src) {
|
@@ -1226,25 +1200,24 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
|
|
1226
1200
|
throw std::logic_error("Failed internal consistency check after downsampling_merge()");
|
1227
1201
|
}
|
1228
1202
|
|
1229
|
-
// update min and max
|
1203
|
+
// update min and max items
|
1230
1204
|
// can't just check is_empty() since min and max might not have been set if
|
1231
1205
|
// there were no base buffer items added via update()
|
1232
|
-
if (tgt.
|
1233
|
-
tgt.
|
1206
|
+
if (tgt.min_item_ == nullptr) {
|
1207
|
+
tgt.min_item_ = new (tgt.allocator_.allocate(1)) T(*src.min_item_);
|
1234
1208
|
} else {
|
1235
|
-
if (
|
1236
|
-
*tgt.
|
1209
|
+
if (tgt.comparator_(*src.min_item_, *tgt.min_item_))
|
1210
|
+
*tgt.min_item_ = conditional_forward<FwdSk>(*src.min_item_);
|
1237
1211
|
}
|
1238
1212
|
|
1239
|
-
if (tgt.
|
1240
|
-
tgt.
|
1213
|
+
if (tgt.max_item_ == nullptr) {
|
1214
|
+
tgt.max_item_ = new (tgt.allocator_.allocate(1)) T(*src.max_item_);
|
1241
1215
|
} else {
|
1242
|
-
if (
|
1243
|
-
*tgt.
|
1216
|
+
if (tgt.comparator_(*tgt.max_item_, *src.max_item_))
|
1217
|
+
*tgt.max_item_ = conditional_forward<FwdSk>(*src.max_item_);
|
1244
1218
|
}
|
1245
1219
|
}
|
1246
1220
|
|
1247
|
-
|
1248
1221
|
template<typename T, typename C, typename A>
|
1249
1222
|
uint8_t quantiles_sketch<T, C, A>::lowest_zero_bit_starting_at(uint64_t bits, uint8_t starting_bit) {
|
1250
1223
|
uint8_t pos = starting_bit & 0X3F;
|
@@ -1292,6 +1265,23 @@ class quantiles_sketch<T, C, A>::items_deleter {
|
|
1292
1265
|
size_t num_;
|
1293
1266
|
};
|
1294
1267
|
|
1268
|
+
template<typename T, typename C, typename A>
|
1269
|
+
void quantiles_sketch<T, C, A>::setup_sorted_view() const {
|
1270
|
+
if (sorted_view_ == nullptr) {
|
1271
|
+
using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
|
1272
|
+
sorted_view_ = new (AllocSortedView(allocator_).allocate(1)) quantiles_sorted_view<T, C, A>(get_sorted_view());
|
1273
|
+
}
|
1274
|
+
}
|
1275
|
+
|
1276
|
+
template<typename T, typename C, typename A>
|
1277
|
+
void quantiles_sketch<T, C, A>::reset_sorted_view() {
|
1278
|
+
if (sorted_view_ != nullptr) {
|
1279
|
+
sorted_view_->~quantiles_sorted_view();
|
1280
|
+
using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
|
1281
|
+
AllocSortedView(allocator_).deallocate(sorted_view_, 1);
|
1282
|
+
sorted_view_ = nullptr;
|
1283
|
+
}
|
1284
|
+
}
|
1295
1285
|
|
1296
1286
|
// quantiles_sketch::const_iterator implementation
|
1297
1287
|
|
@@ -1364,8 +1354,13 @@ bool quantiles_sketch<T, C, A>::const_iterator::operator!=(const const_iterator&
|
|
1364
1354
|
}
|
1365
1355
|
|
1366
1356
|
template<typename T, typename C, typename A>
|
1367
|
-
|
1368
|
-
return
|
1357
|
+
auto quantiles_sketch<T, C, A>::const_iterator::operator*() const -> const value_type {
|
1358
|
+
return value_type(level_ == -1 ? base_buffer_[index_] : levels_[level_][index_], weight_);
|
1359
|
+
}
|
1360
|
+
|
1361
|
+
template<typename T, typename C, typename A>
|
1362
|
+
auto quantiles_sketch<T, C, A>::const_iterator::operator->() const -> const return_value_holder<value_type> {
|
1363
|
+
return **this;
|
1369
1364
|
}
|
1370
1365
|
|
1371
1366
|
} /* namespace datasketches */
|