datasketches 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/kll_wrapper.cpp +20 -20
- data/ext/datasketches/theta_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
- data/vendor/datasketches-cpp/MANIFEST.in +21 -2
- data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
- data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
- data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
- data/vendor/datasketches-cpp/pyproject.toml +17 -12
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
- data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/setup.py +14 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
- data/vendor/datasketches-cpp/tox.ini +26 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
- data/vendor/datasketches-cpp/version.cfg.in +1 -0
- metadata +14 -5
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
|
@@ -26,24 +26,24 @@
|
|
|
26
26
|
#include <iomanip>
|
|
27
27
|
#include <sstream>
|
|
28
28
|
|
|
29
|
-
#include "common_defs.hpp"
|
|
30
29
|
#include "count_zeros.hpp"
|
|
31
30
|
#include "conditional_forward.hpp"
|
|
32
|
-
#include "quantiles_sketch.hpp"
|
|
33
31
|
|
|
34
32
|
namespace datasketches {
|
|
35
33
|
|
|
36
34
|
template<typename T, typename C, typename A>
|
|
37
|
-
quantiles_sketch<T, C, A>::quantiles_sketch(uint16_t k, const A& allocator):
|
|
35
|
+
quantiles_sketch<T, C, A>::quantiles_sketch(uint16_t k, const C& comparator, const A& allocator):
|
|
36
|
+
comparator_(comparator),
|
|
38
37
|
allocator_(allocator),
|
|
38
|
+
is_base_buffer_sorted_(true),
|
|
39
39
|
k_(k),
|
|
40
40
|
n_(0),
|
|
41
41
|
bit_pattern_(0),
|
|
42
42
|
base_buffer_(allocator_),
|
|
43
43
|
levels_(allocator_),
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
44
|
+
min_item_(nullptr),
|
|
45
|
+
max_item_(nullptr),
|
|
46
|
+
sorted_view_(nullptr)
|
|
47
47
|
{
|
|
48
48
|
check_k(k_);
|
|
49
49
|
base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k));
|
|
@@ -51,18 +51,20 @@ is_sorted_(true)
|
|
|
51
51
|
|
|
52
52
|
template<typename T, typename C, typename A>
|
|
53
53
|
quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch& other):
|
|
54
|
+
comparator_(other.comparator_),
|
|
54
55
|
allocator_(other.allocator_),
|
|
56
|
+
is_base_buffer_sorted_(other.is_base_buffer_sorted_),
|
|
55
57
|
k_(other.k_),
|
|
56
58
|
n_(other.n_),
|
|
57
59
|
bit_pattern_(other.bit_pattern_),
|
|
58
60
|
base_buffer_(other.base_buffer_),
|
|
59
61
|
levels_(other.levels_),
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
62
|
+
min_item_(nullptr),
|
|
63
|
+
max_item_(nullptr),
|
|
64
|
+
sorted_view_(nullptr)
|
|
63
65
|
{
|
|
64
|
-
if (other.
|
|
65
|
-
if (other.
|
|
66
|
+
if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
|
|
67
|
+
if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
|
|
66
68
|
for (size_t i = 0; i < levels_.size(); ++i) {
|
|
67
69
|
if (levels_[i].capacity() != other.levels_[i].capacity()) {
|
|
68
70
|
levels_[i].reserve(other.levels_[i].capacity());
|
|
@@ -72,63 +74,71 @@ is_sorted_(other.is_sorted_)
|
|
|
72
74
|
|
|
73
75
|
template<typename T, typename C, typename A>
|
|
74
76
|
quantiles_sketch<T, C, A>::quantiles_sketch(quantiles_sketch&& other) noexcept:
|
|
77
|
+
comparator_(other.comparator_),
|
|
75
78
|
allocator_(other.allocator_),
|
|
79
|
+
is_base_buffer_sorted_(other.is_base_buffer_sorted_),
|
|
76
80
|
k_(other.k_),
|
|
77
81
|
n_(other.n_),
|
|
78
82
|
bit_pattern_(other.bit_pattern_),
|
|
79
83
|
base_buffer_(std::move(other.base_buffer_)),
|
|
80
84
|
levels_(std::move(other.levels_)),
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
85
|
+
min_item_(other.min_item_),
|
|
86
|
+
max_item_(other.max_item_),
|
|
87
|
+
sorted_view_(nullptr)
|
|
84
88
|
{
|
|
85
|
-
other.
|
|
86
|
-
other.
|
|
89
|
+
other.min_item_ = nullptr;
|
|
90
|
+
other.max_item_ = nullptr;
|
|
87
91
|
}
|
|
88
92
|
|
|
89
93
|
template<typename T, typename C, typename A>
|
|
90
94
|
quantiles_sketch<T, C, A>& quantiles_sketch<T, C, A>::operator=(const quantiles_sketch& other) {
|
|
91
95
|
quantiles_sketch<T, C, A> copy(other);
|
|
96
|
+
std::swap(comparator_, copy.comparator_);
|
|
92
97
|
std::swap(allocator_, copy.allocator_);
|
|
98
|
+
std::swap(is_base_buffer_sorted_, copy.is_base_buffer_sorted_);
|
|
93
99
|
std::swap(k_, copy.k_);
|
|
94
100
|
std::swap(n_, copy.n_);
|
|
95
101
|
std::swap(bit_pattern_, copy.bit_pattern_);
|
|
96
102
|
std::swap(base_buffer_, copy.base_buffer_);
|
|
97
103
|
std::swap(levels_, copy.levels_);
|
|
98
|
-
std::swap(
|
|
99
|
-
std::swap(
|
|
100
|
-
|
|
104
|
+
std::swap(min_item_, copy.min_item_);
|
|
105
|
+
std::swap(max_item_, copy.max_item_);
|
|
106
|
+
reset_sorted_view();
|
|
101
107
|
return *this;
|
|
102
108
|
}
|
|
103
109
|
|
|
104
110
|
template<typename T, typename C, typename A>
|
|
105
111
|
quantiles_sketch<T, C, A>& quantiles_sketch<T, C, A>::operator=(quantiles_sketch&& other) noexcept {
|
|
112
|
+
std::swap(comparator_, other.comparator_);
|
|
106
113
|
std::swap(allocator_, other.allocator_);
|
|
114
|
+
std::swap(is_base_buffer_sorted_, other.is_base_buffer_sorted_);
|
|
107
115
|
std::swap(k_, other.k_);
|
|
108
116
|
std::swap(n_, other.n_);
|
|
109
117
|
std::swap(bit_pattern_, other.bit_pattern_);
|
|
110
118
|
std::swap(base_buffer_, other.base_buffer_);
|
|
111
119
|
std::swap(levels_, other.levels_);
|
|
112
|
-
std::swap(
|
|
113
|
-
std::swap(
|
|
114
|
-
|
|
120
|
+
std::swap(min_item_, other.min_item_);
|
|
121
|
+
std::swap(max_item_, other.max_item_);
|
|
122
|
+
reset_sorted_view();
|
|
115
123
|
return *this;
|
|
116
124
|
}
|
|
117
125
|
|
|
118
126
|
template<typename T, typename C, typename A>
|
|
119
127
|
quantiles_sketch<T, C, A>::quantiles_sketch(uint16_t k, uint64_t n, uint64_t bit_pattern,
|
|
120
128
|
Level&& base_buffer, VectorLevels&& levels,
|
|
121
|
-
std::unique_ptr<T, item_deleter>
|
|
122
|
-
bool is_sorted, const A& allocator)
|
|
129
|
+
std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item,
|
|
130
|
+
bool is_sorted, const C& comparator, const A& allocator):
|
|
131
|
+
comparator_(comparator),
|
|
123
132
|
allocator_(allocator),
|
|
133
|
+
is_base_buffer_sorted_(is_sorted),
|
|
124
134
|
k_(k),
|
|
125
135
|
n_(n),
|
|
126
136
|
bit_pattern_(bit_pattern),
|
|
127
137
|
base_buffer_(std::move(base_buffer)),
|
|
128
138
|
levels_(std::move(levels)),
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
139
|
+
min_item_(min_item.release()),
|
|
140
|
+
max_item_(max_item.release()),
|
|
141
|
+
sorted_view_(nullptr)
|
|
132
142
|
{
|
|
133
143
|
uint32_t item_count = base_buffer_.size();
|
|
134
144
|
for (Level& lvl : levels_) {
|
|
@@ -140,16 +150,19 @@ is_sorted_(is_sorted)
|
|
|
140
150
|
|
|
141
151
|
template<typename T, typename C, typename A>
|
|
142
152
|
template<typename From, typename FC, typename FA>
|
|
143
|
-
quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch<From, FC, FA>& other,
|
|
153
|
+
quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch<From, FC, FA>& other,
|
|
154
|
+
const C& comparator, const A& allocator):
|
|
155
|
+
comparator_(comparator),
|
|
144
156
|
allocator_(allocator),
|
|
157
|
+
is_base_buffer_sorted_(false),
|
|
145
158
|
k_(other.get_k()),
|
|
146
159
|
n_(other.get_n()),
|
|
147
160
|
bit_pattern_(compute_bit_pattern(other.get_k(), other.get_n())),
|
|
148
161
|
base_buffer_(allocator),
|
|
149
162
|
levels_(allocator),
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
163
|
+
min_item_(nullptr),
|
|
164
|
+
max_item_(nullptr),
|
|
165
|
+
sorted_view_(nullptr)
|
|
153
166
|
{
|
|
154
167
|
static_assert(std::is_constructible<T, From>::value,
|
|
155
168
|
"Type converting constructor requires new type to be constructible from existing type");
|
|
@@ -157,8 +170,8 @@ is_sorted_(false)
|
|
|
157
170
|
base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k_));
|
|
158
171
|
|
|
159
172
|
if (!other.is_empty()) {
|
|
160
|
-
|
|
161
|
-
|
|
173
|
+
min_item_ = new (allocator_.allocate(1)) T(other.get_min_item());
|
|
174
|
+
max_item_ = new (allocator_.allocate(1)) T(other.get_max_item());
|
|
162
175
|
|
|
163
176
|
// reserve space in levels
|
|
164
177
|
const uint8_t num_levels = compute_levels_needed(k_, n_);
|
|
@@ -189,7 +202,7 @@ is_sorted_(false)
|
|
|
189
202
|
// validate that ordering within each level is preserved
|
|
190
203
|
// base_buffer_ can be considered unsorted for this purpose
|
|
191
204
|
for (int i = 0; i < num_levels; ++i) {
|
|
192
|
-
if (!std::is_sorted(levels_[i].begin(), levels_[i].end(),
|
|
205
|
+
if (!std::is_sorted(levels_[i].begin(), levels_[i].end(), comparator_)) {
|
|
193
206
|
throw std::logic_error("Copy construction across types produces invalid sorting");
|
|
194
207
|
}
|
|
195
208
|
}
|
|
@@ -199,40 +212,38 @@ is_sorted_(false)
|
|
|
199
212
|
|
|
200
213
|
template<typename T, typename C, typename A>
|
|
201
214
|
quantiles_sketch<T, C, A>::~quantiles_sketch() {
|
|
202
|
-
if (
|
|
203
|
-
|
|
204
|
-
allocator_.deallocate(
|
|
215
|
+
if (min_item_ != nullptr) {
|
|
216
|
+
min_item_->~T();
|
|
217
|
+
allocator_.deallocate(min_item_, 1);
|
|
205
218
|
}
|
|
206
|
-
if (
|
|
207
|
-
|
|
208
|
-
allocator_.deallocate(
|
|
219
|
+
if (max_item_ != nullptr) {
|
|
220
|
+
max_item_->~T();
|
|
221
|
+
allocator_.deallocate(max_item_, 1);
|
|
209
222
|
}
|
|
223
|
+
reset_sorted_view();
|
|
210
224
|
}
|
|
211
225
|
|
|
212
226
|
template<typename T, typename C, typename A>
|
|
213
227
|
template<typename FwdT>
|
|
214
228
|
void quantiles_sketch<T, C, A>::update(FwdT&& item) {
|
|
215
|
-
if (!
|
|
229
|
+
if (!check_update_item(item)) { return; }
|
|
216
230
|
if (is_empty()) {
|
|
217
|
-
|
|
218
|
-
|
|
231
|
+
min_item_ = new (allocator_.allocate(1)) T(item);
|
|
232
|
+
max_item_ = new (allocator_.allocate(1)) T(item);
|
|
219
233
|
} else {
|
|
220
|
-
if (
|
|
221
|
-
if (
|
|
234
|
+
if (comparator_(item, *min_item_)) *min_item_ = item;
|
|
235
|
+
if (comparator_(*max_item_, item)) *max_item_ = item;
|
|
222
236
|
}
|
|
223
237
|
|
|
224
238
|
// if exceed capacity, grow until size 2k -- assumes eager processing
|
|
225
|
-
if (base_buffer_.size() + 1 > base_buffer_.capacity())
|
|
226
|
-
grow_base_buffer();
|
|
239
|
+
if (base_buffer_.size() + 1 > base_buffer_.capacity()) grow_base_buffer();
|
|
227
240
|
|
|
228
241
|
base_buffer_.push_back(std::forward<FwdT>(item));
|
|
229
242
|
++n_;
|
|
230
243
|
|
|
231
|
-
if (base_buffer_.size() > 1)
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
if (base_buffer_.size() == 2 * k_)
|
|
235
|
-
process_full_base_buffer();
|
|
244
|
+
if (base_buffer_.size() > 1) is_base_buffer_sorted_ = false;
|
|
245
|
+
if (base_buffer_.size() == 2 * k_) process_full_base_buffer();
|
|
246
|
+
reset_sorted_view();
|
|
236
247
|
}
|
|
237
248
|
|
|
238
249
|
template<typename T, typename C, typename A>
|
|
@@ -245,10 +256,11 @@ void quantiles_sketch<T, C, A>::merge(FwdSk&& other) {
|
|
|
245
256
|
for (auto item : other.base_buffer_) {
|
|
246
257
|
update(conditional_forward<FwdSk>(item));
|
|
247
258
|
}
|
|
248
|
-
|
|
259
|
+
reset_sorted_view();
|
|
260
|
+
return;
|
|
249
261
|
}
|
|
250
262
|
|
|
251
|
-
//
|
|
263
|
+
// other has data and is in estimation mode
|
|
252
264
|
if (is_estimation_mode()) {
|
|
253
265
|
if (k_ == other.get_k()) {
|
|
254
266
|
standard_merge(*this, other);
|
|
@@ -273,6 +285,7 @@ void quantiles_sketch<T, C, A>::merge(FwdSk&& other) {
|
|
|
273
285
|
}
|
|
274
286
|
*this = sk_copy;
|
|
275
287
|
}
|
|
288
|
+
reset_sorted_view();
|
|
276
289
|
}
|
|
277
290
|
|
|
278
291
|
template<typename T, typename C, typename A>
|
|
@@ -286,8 +299,8 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
|
|
|
286
299
|
write(os, family);
|
|
287
300
|
|
|
288
301
|
// side-effect: sort base buffer since always compact
|
|
289
|
-
|
|
290
|
-
|
|
302
|
+
std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), comparator_);
|
|
303
|
+
const_cast<quantiles_sketch*>(this)->is_base_buffer_sorted_ = true;
|
|
291
304
|
|
|
292
305
|
// empty, ordered, compact are valid flags
|
|
293
306
|
const uint8_t flags_byte(
|
|
@@ -304,8 +317,8 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
|
|
|
304
317
|
write(os, n_);
|
|
305
318
|
|
|
306
319
|
// min and max
|
|
307
|
-
serde.serialize(os,
|
|
308
|
-
serde.serialize(os,
|
|
320
|
+
serde.serialize(os, min_item_, 1);
|
|
321
|
+
serde.serialize(os, max_item_, 1);
|
|
309
322
|
|
|
310
323
|
// base buffer items
|
|
311
324
|
serde.serialize(os, base_buffer_.data(), static_cast<unsigned>(base_buffer_.size()));
|
|
@@ -334,8 +347,8 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
|
|
|
334
347
|
ptr += copy_to_mem(family, ptr);
|
|
335
348
|
|
|
336
349
|
// side-effect: sort base buffer since always compact
|
|
337
|
-
|
|
338
|
-
|
|
350
|
+
std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), comparator_);
|
|
351
|
+
const_cast<quantiles_sketch*>(this)->is_base_buffer_sorted_ = true;
|
|
339
352
|
|
|
340
353
|
// empty, ordered, compact are valid flags
|
|
341
354
|
const uint8_t flags_byte(
|
|
@@ -352,8 +365,8 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
|
|
|
352
365
|
ptr += copy_to_mem(n_, ptr);
|
|
353
366
|
|
|
354
367
|
// min and max
|
|
355
|
-
ptr += serde.serialize(ptr, end_ptr - ptr,
|
|
356
|
-
ptr += serde.serialize(ptr, end_ptr - ptr,
|
|
368
|
+
ptr += serde.serialize(ptr, end_ptr - ptr, min_item_, 1);
|
|
369
|
+
ptr += serde.serialize(ptr, end_ptr - ptr, max_item_, 1);
|
|
357
370
|
|
|
358
371
|
// base buffer items
|
|
359
372
|
if (base_buffer_.size() > 0)
|
|
@@ -371,7 +384,8 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
|
|
|
371
384
|
|
|
372
385
|
template<typename T, typename C, typename A>
|
|
373
386
|
template<typename SerDe>
|
|
374
|
-
auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde,
|
|
387
|
+
auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde,
|
|
388
|
+
const C& comparator, const A &allocator) -> quantiles_sketch {
|
|
375
389
|
const auto preamble_longs = read<uint8_t>(is);
|
|
376
390
|
const auto serial_version = read<uint8_t>(is);
|
|
377
391
|
const auto family_id = read<uint8_t>(is);
|
|
@@ -387,7 +401,7 @@ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde
|
|
|
387
401
|
if (!is.good()) throw std::runtime_error("error reading from std::istream");
|
|
388
402
|
const bool is_empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
|
|
389
403
|
if (is_empty) {
|
|
390
|
-
return quantiles_sketch(k, allocator);
|
|
404
|
+
return quantiles_sketch(k, comparator, allocator);
|
|
391
405
|
}
|
|
392
406
|
|
|
393
407
|
const auto items_seen = read<uint64_t>(is);
|
|
@@ -397,17 +411,17 @@ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde
|
|
|
397
411
|
|
|
398
412
|
A alloc(allocator);
|
|
399
413
|
auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
|
|
400
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
|
401
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
|
402
|
-
std::unique_ptr<T, item_deleter>
|
|
403
|
-
std::unique_ptr<T, item_deleter>
|
|
414
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
|
415
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
|
416
|
+
std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
|
|
417
|
+
std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
|
|
404
418
|
|
|
405
|
-
serde.deserialize(is,
|
|
419
|
+
serde.deserialize(is, min_item_buffer.get(), 1);
|
|
406
420
|
// serde call did not throw, repackage with destrtuctor
|
|
407
|
-
|
|
408
|
-
serde.deserialize(is,
|
|
421
|
+
min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
|
|
422
|
+
serde.deserialize(is, max_item_buffer.get(), 1);
|
|
409
423
|
// serde call did not throw, repackage with destrtuctor
|
|
410
|
-
|
|
424
|
+
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
|
411
425
|
|
|
412
426
|
if (serial_version == 1) {
|
|
413
427
|
read<uint64_t>(is); // no longer used
|
|
@@ -449,7 +463,8 @@ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde
|
|
|
449
463
|
}
|
|
450
464
|
|
|
451
465
|
return quantiles_sketch(k, items_seen, bit_pattern,
|
|
452
|
-
std::move(base_buffer), std::move(levels), std::move(
|
|
466
|
+
std::move(base_buffer), std::move(levels), std::move(min_item), std::move(max_item), is_sorted,
|
|
467
|
+
comparator, allocator);
|
|
453
468
|
}
|
|
454
469
|
|
|
455
470
|
template<typename T, typename C, typename A>
|
|
@@ -473,7 +488,8 @@ auto quantiles_sketch<T, C, A>::deserialize_array(std::istream& is, uint32_t num
|
|
|
473
488
|
|
|
474
489
|
template<typename T, typename C, typename A>
|
|
475
490
|
template<typename SerDe>
|
|
476
|
-
auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, const SerDe& serde,
|
|
491
|
+
auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, const SerDe& serde,
|
|
492
|
+
const C& comparator, const A &allocator) -> quantiles_sketch {
|
|
477
493
|
ensure_minimum_memory(size, 8);
|
|
478
494
|
const char* ptr = static_cast<const char*>(bytes);
|
|
479
495
|
const char* end_ptr = static_cast<const char*>(bytes) + size;
|
|
@@ -498,7 +514,7 @@ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, cons
|
|
|
498
514
|
|
|
499
515
|
const bool is_empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
|
|
500
516
|
if (is_empty) {
|
|
501
|
-
return quantiles_sketch(k, allocator);
|
|
517
|
+
return quantiles_sketch(k, comparator, allocator);
|
|
502
518
|
}
|
|
503
519
|
|
|
504
520
|
ensure_minimum_memory(size, 16);
|
|
@@ -510,17 +526,17 @@ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, cons
|
|
|
510
526
|
|
|
511
527
|
A alloc(allocator);
|
|
512
528
|
auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
|
|
513
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
|
514
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
|
515
|
-
std::unique_ptr<T, item_deleter>
|
|
516
|
-
std::unique_ptr<T, item_deleter>
|
|
529
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
|
530
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
|
531
|
+
std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
|
|
532
|
+
std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
|
|
517
533
|
|
|
518
|
-
ptr += serde.deserialize(ptr, end_ptr - ptr,
|
|
534
|
+
ptr += serde.deserialize(ptr, end_ptr - ptr, min_item_buffer.get(), 1);
|
|
519
535
|
// serde call did not throw, repackage with destrtuctor
|
|
520
|
-
|
|
521
|
-
ptr += serde.deserialize(ptr, end_ptr - ptr,
|
|
536
|
+
min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
|
|
537
|
+
ptr += serde.deserialize(ptr, end_ptr - ptr, max_item_buffer.get(), 1);
|
|
522
538
|
// serde call did not throw, repackage with destrtuctor
|
|
523
|
-
|
|
539
|
+
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
|
524
540
|
|
|
525
541
|
if (serial_version == 1) {
|
|
526
542
|
uint64_t unused_long;
|
|
@@ -567,7 +583,8 @@ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, cons
|
|
|
567
583
|
}
|
|
568
584
|
|
|
569
585
|
return quantiles_sketch(k, items_seen, bit_pattern,
|
|
570
|
-
std::move(base_buffer_pair.first), std::move(levels), std::move(
|
|
586
|
+
std::move(base_buffer_pair.first), std::move(levels), std::move(min_item), std::move(max_item), is_sorted,
|
|
587
|
+
comparator, allocator);
|
|
571
588
|
}
|
|
572
589
|
|
|
573
590
|
template<typename T, typename C, typename A>
|
|
@@ -605,11 +622,11 @@ string<A> quantiles_sketch<T, C, A>::to_string(bool print_levels, bool print_ite
|
|
|
605
622
|
os << " Empty : " << (is_empty() ? "true" : "false") << std::endl;
|
|
606
623
|
os << " Estimation mode: " << (is_estimation_mode() ? "true" : "false") << std::endl;
|
|
607
624
|
os << " Levels (w/o BB): " << levels_.size() << std::endl;
|
|
608
|
-
os << " Used Levels : " <<
|
|
625
|
+
os << " Used Levels : " << count_valid_levels(bit_pattern_) << std::endl;
|
|
609
626
|
os << " Retained items : " << get_num_retained() << std::endl;
|
|
610
627
|
if (!is_empty()) {
|
|
611
|
-
os << " Min
|
|
612
|
-
os << " Max
|
|
628
|
+
os << " Min item : " << *min_item_ << std::endl;
|
|
629
|
+
os << " Max item : " << *max_item_ << std::endl;
|
|
613
630
|
}
|
|
614
631
|
os << "### End sketch summary" << std::endl;
|
|
615
632
|
|
|
@@ -667,20 +684,20 @@ uint32_t quantiles_sketch<T, C, A>::get_num_retained() const {
|
|
|
667
684
|
}
|
|
668
685
|
|
|
669
686
|
template<typename T, typename C, typename A>
|
|
670
|
-
const T& quantiles_sketch<T, C, A>::
|
|
671
|
-
if (is_empty())
|
|
672
|
-
return *
|
|
687
|
+
const T& quantiles_sketch<T, C, A>::get_min_item() const {
|
|
688
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
689
|
+
return *min_item_;
|
|
673
690
|
}
|
|
674
691
|
|
|
675
692
|
template<typename T, typename C, typename A>
|
|
676
|
-
const T& quantiles_sketch<T, C, A>::
|
|
677
|
-
if (is_empty())
|
|
678
|
-
return *
|
|
693
|
+
const T& quantiles_sketch<T, C, A>::get_max_item() const {
|
|
694
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
695
|
+
return *max_item_;
|
|
679
696
|
}
|
|
680
697
|
|
|
681
698
|
template<typename T, typename C, typename A>
|
|
682
699
|
C quantiles_sketch<T, C, A>::get_comparator() const {
|
|
683
|
-
return
|
|
700
|
+
return comparator_;
|
|
684
701
|
}
|
|
685
702
|
|
|
686
703
|
template<typename T, typename C, typename A>
|
|
@@ -702,8 +719,8 @@ template<typename SerDe, typename TT, typename std::enable_if<!std::is_arithmeti
|
|
|
702
719
|
size_t quantiles_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& serde) const {
|
|
703
720
|
if (is_empty()) { return EMPTY_SIZE_BYTES; }
|
|
704
721
|
size_t size = DATA_START;
|
|
705
|
-
size += serde.size_of_item(*
|
|
706
|
-
size += serde.size_of_item(*
|
|
722
|
+
size += serde.size_of_item(*min_item_);
|
|
723
|
+
size += serde.size_of_item(*max_item_);
|
|
707
724
|
for (auto it: *this) size += serde.size_of_item(it.first);
|
|
708
725
|
return size;
|
|
709
726
|
}
|
|
@@ -721,162 +738,121 @@ double quantiles_sketch<T, C, A>::get_normalized_rank_error(uint16_t k, bool is_
|
|
|
721
738
|
}
|
|
722
739
|
|
|
723
740
|
template<typename T, typename C, typename A>
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), C());
|
|
741
|
+
quantiles_sorted_view<T, C, A> quantiles_sketch<T, C, A>::get_sorted_view() const {
|
|
742
|
+
// allow side-effect of sorting the base buffer
|
|
743
|
+
if (!is_base_buffer_sorted_) {
|
|
744
|
+
std::sort(const_cast<Level&>(base_buffer_).begin(), const_cast<Level&>(base_buffer_).end(), comparator_);
|
|
745
|
+
const_cast<quantiles_sketch*>(this)->is_base_buffer_sorted_ = true;
|
|
730
746
|
}
|
|
731
|
-
|
|
747
|
+
quantiles_sorted_view<T, C, A> view(get_num_retained(), comparator_, allocator_);
|
|
732
748
|
|
|
733
749
|
uint64_t weight = 1;
|
|
734
750
|
view.add(base_buffer_.begin(), base_buffer_.end(), weight);
|
|
735
|
-
for (auto& level
|
|
751
|
+
for (const auto& level: levels_) {
|
|
736
752
|
weight <<= 1;
|
|
737
753
|
if (level.empty()) { continue; }
|
|
738
754
|
view.add(level.begin(), level.end(), weight);
|
|
739
755
|
}
|
|
740
756
|
|
|
741
|
-
|
|
757
|
+
view.convert_to_cummulative();
|
|
742
758
|
return view;
|
|
743
759
|
}
|
|
744
760
|
|
|
745
761
|
template<typename T, typename C, typename A>
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
if (is_empty()) return get_invalid_value();
|
|
749
|
-
if (rank == 0.0) return *min_value_;
|
|
750
|
-
if (rank == 1.0) return *max_value_;
|
|
762
|
+
auto quantiles_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
|
|
763
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
751
764
|
if ((rank < 0.0) || (rank > 1.0)) {
|
|
752
|
-
throw std::invalid_argument("
|
|
765
|
+
throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
|
|
753
766
|
}
|
|
754
767
|
// possible side-effect: sorting base buffer
|
|
755
|
-
|
|
768
|
+
setup_sorted_view();
|
|
769
|
+
return sorted_view_->get_quantile(rank, inclusive);
|
|
756
770
|
}
|
|
757
771
|
|
|
758
772
|
template<typename T, typename C, typename A>
|
|
759
|
-
|
|
760
|
-
|
|
773
|
+
std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
|
|
774
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
761
775
|
std::vector<T, A> quantiles(allocator_);
|
|
762
|
-
if (is_empty()) return quantiles;
|
|
763
776
|
quantiles.reserve(size);
|
|
764
777
|
|
|
765
778
|
// possible side-effect: sorting base buffer
|
|
766
|
-
|
|
779
|
+
setup_sorted_view();
|
|
767
780
|
|
|
768
781
|
for (uint32_t i = 0; i < size; ++i) {
|
|
769
782
|
const double rank = ranks[i];
|
|
770
783
|
if ((rank < 0.0) || (rank > 1.0)) {
|
|
771
|
-
throw std::invalid_argument("rank cannot be less than
|
|
772
|
-
}
|
|
773
|
-
if (rank == 0.0) quantiles.push_back(*min_value_);
|
|
774
|
-
else if (rank == 1.0) quantiles.push_back(*max_value_);
|
|
775
|
-
else {
|
|
776
|
-
quantiles.push_back(view.get_quantile(rank));
|
|
784
|
+
throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
|
|
777
785
|
}
|
|
786
|
+
quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
|
|
778
787
|
}
|
|
779
788
|
return quantiles;
|
|
780
789
|
}
|
|
781
790
|
|
|
782
791
|
template<typename T, typename C, typename A>
|
|
783
|
-
|
|
784
|
-
std::
|
|
785
|
-
if (is_empty()) return std::vector<T, A>(allocator_);
|
|
792
|
+
std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(uint32_t num, bool inclusive) const {
|
|
793
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
786
794
|
if (num == 0) {
|
|
787
795
|
throw std::invalid_argument("num must be > 0");
|
|
788
796
|
}
|
|
789
|
-
vector_double
|
|
790
|
-
|
|
797
|
+
vector_double ranks(num, 0, allocator_);
|
|
798
|
+
ranks[0] = 0.0;
|
|
791
799
|
for (size_t i = 1; i < num; i++) {
|
|
792
|
-
|
|
800
|
+
ranks[i] = static_cast<double>(i) / (num - 1);
|
|
793
801
|
}
|
|
794
802
|
if (num > 1) {
|
|
795
|
-
|
|
803
|
+
ranks[num - 1] = 1.0;
|
|
796
804
|
}
|
|
797
|
-
return get_quantiles
|
|
805
|
+
return get_quantiles(ranks.data(), num, inclusive);
|
|
798
806
|
}
|
|
799
807
|
|
|
800
808
|
template<typename T, typename C, typename A>
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
uint64_t total = 0;
|
|
806
|
-
for (const T &item: base_buffer_) {
|
|
807
|
-
if (inclusive ? !C()(value, item) : C()(item, value))
|
|
808
|
-
total += weight;
|
|
809
|
-
}
|
|
810
|
-
|
|
811
|
-
weight *= 2;
|
|
812
|
-
for (uint8_t level = 0; level < levels_.size(); ++level, weight *= 2) {
|
|
813
|
-
if (levels_[level].empty()) { continue; }
|
|
814
|
-
const T* data = levels_[level].data();
|
|
815
|
-
for (uint16_t i = 0; i < k_; ++i) {
|
|
816
|
-
if (inclusive ? !C()(value, data[i]) : C()(data[i], value))
|
|
817
|
-
total += weight;
|
|
818
|
-
else
|
|
819
|
-
break; // levels are sorted, no point comparing further
|
|
820
|
-
}
|
|
821
|
-
}
|
|
822
|
-
return (double) total / n_;
|
|
809
|
+
double quantiles_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
|
|
810
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
811
|
+
setup_sorted_view();
|
|
812
|
+
return sorted_view_->get_rank(item, inclusive);
|
|
823
813
|
}
|
|
824
814
|
|
|
825
815
|
template<typename T, typename C, typename A>
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
for (uint32_t i = size; i > 0; --i) {
|
|
831
|
-
buckets[i] -= buckets[i - 1];
|
|
832
|
-
}
|
|
833
|
-
return buckets;
|
|
816
|
+
auto quantiles_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
|
|
817
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
818
|
+
setup_sorted_view();
|
|
819
|
+
return sorted_view_->get_PMF(split_points, size, inclusive);
|
|
834
820
|
}
|
|
835
821
|
|
|
836
822
|
template<typename T, typename C, typename A>
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
check_split_points(split_points, size);
|
|
842
|
-
buckets.reserve(size + 1);
|
|
843
|
-
for (uint32_t i = 0; i < size; ++i) buckets.push_back(get_rank<inclusive>(split_points[i]));
|
|
844
|
-
buckets.push_back(1);
|
|
845
|
-
return buckets;
|
|
823
|
+
auto quantiles_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
|
|
824
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
825
|
+
setup_sorted_view();
|
|
826
|
+
return sorted_view_->get_CDF(split_points, size, inclusive);
|
|
846
827
|
}
|
|
847
828
|
|
|
848
829
|
template<typename T, typename C, typename A>
|
|
849
|
-
uint32_t quantiles_sketch<T, C, A>::compute_retained_items(
|
|
830
|
+
uint32_t quantiles_sketch<T, C, A>::compute_retained_items(uint16_t k, uint64_t n) {
|
|
850
831
|
const uint32_t bb_count = compute_base_buffer_items(k, n);
|
|
851
832
|
const uint64_t bit_pattern = compute_bit_pattern(k, n);
|
|
852
|
-
const uint32_t valid_levels =
|
|
833
|
+
const uint32_t valid_levels = count_valid_levels(bit_pattern);
|
|
853
834
|
return bb_count + (k * valid_levels);
|
|
854
835
|
}
|
|
855
836
|
|
|
856
837
|
template<typename T, typename C, typename A>
|
|
857
|
-
uint32_t quantiles_sketch<T, C, A>::compute_base_buffer_items(
|
|
838
|
+
uint32_t quantiles_sketch<T, C, A>::compute_base_buffer_items(uint16_t k, uint64_t n) {
|
|
858
839
|
return n % (static_cast<uint64_t>(2) * k);
|
|
859
840
|
}
|
|
860
841
|
|
|
861
842
|
template<typename T, typename C, typename A>
|
|
862
|
-
uint64_t quantiles_sketch<T, C, A>::compute_bit_pattern(
|
|
843
|
+
uint64_t quantiles_sketch<T, C, A>::compute_bit_pattern(uint16_t k, uint64_t n) {
|
|
863
844
|
return n / (static_cast<uint64_t>(2) * k);
|
|
864
845
|
}
|
|
865
846
|
|
|
866
847
|
template<typename T, typename C, typename A>
|
|
867
|
-
uint32_t quantiles_sketch<T, C, A>::
|
|
868
|
-
// TODO: Java's Long.bitCount() probably uses a better method
|
|
869
|
-
uint64_t bp = bit_pattern;
|
|
848
|
+
uint32_t quantiles_sketch<T, C, A>::count_valid_levels(uint64_t bit_pattern) {
|
|
870
849
|
uint32_t count = 0;
|
|
871
|
-
|
|
872
|
-
if ((bp & 0x01) == 1) ++count;
|
|
873
|
-
bp >>= 1;
|
|
874
|
-
}
|
|
850
|
+
for (; bit_pattern > 0; ++count) bit_pattern &= bit_pattern - 1;
|
|
875
851
|
return count;
|
|
876
852
|
}
|
|
877
853
|
|
|
878
854
|
template<typename T, typename C, typename A>
|
|
879
|
-
uint8_t quantiles_sketch<T, C, A>::compute_levels_needed(
|
|
855
|
+
uint8_t quantiles_sketch<T, C, A>::compute_levels_needed(uint16_t k, uint64_t n) {
|
|
880
856
|
return static_cast<uint8_t>(64U) - count_leading_zeros_in_u64(n / (2 * k));
|
|
881
857
|
}
|
|
882
858
|
|
|
@@ -961,13 +937,13 @@ void quantiles_sketch<T, C, A>::process_full_base_buffer() {
|
|
|
961
937
|
// make sure there will be enough levels for the propagation
|
|
962
938
|
grow_levels_if_needed(); // note: n_ was already incremented by update() before this
|
|
963
939
|
|
|
964
|
-
std::sort(base_buffer_.begin(), base_buffer_.end(),
|
|
940
|
+
std::sort(base_buffer_.begin(), base_buffer_.end(), comparator_);
|
|
965
941
|
in_place_propagate_carry(0,
|
|
966
942
|
levels_[0], // unused here, but 0 is guaranteed to exist
|
|
967
943
|
base_buffer_,
|
|
968
944
|
true, *this);
|
|
969
945
|
base_buffer_.clear();
|
|
970
|
-
|
|
946
|
+
is_base_buffer_sorted_ = true;
|
|
971
947
|
if (n_ / (2 * k_) != bit_pattern_) {
|
|
972
948
|
throw std::logic_error("Internal error: n / 2k (" + std::to_string(n_ / 2 * k_)
|
|
973
949
|
+ " != bit_pattern " + std::to_string(bit_pattern_));
|
|
@@ -1019,7 +995,7 @@ void quantiles_sketch<T, C, A>::in_place_propagate_carry(uint8_t starting_level,
|
|
|
1019
995
|
merge_two_size_k_buffers(
|
|
1020
996
|
sketch.levels_[lvl],
|
|
1021
997
|
sketch.levels_[ending_level],
|
|
1022
|
-
buf_size_2k);
|
|
998
|
+
buf_size_2k, sketch.get_comparator());
|
|
1023
999
|
sketch.levels_[lvl].clear();
|
|
1024
1000
|
sketch.levels_[ending_level].clear();
|
|
1025
1001
|
zip_buffer(buf_size_2k, sketch.levels_[ending_level]);
|
|
@@ -1071,9 +1047,9 @@ void quantiles_sketch<T, C, A>::zip_buffer_with_stride(FwdV&& buf_in, Level& buf
|
|
|
1071
1047
|
// do not clear input buffer
|
|
1072
1048
|
}
|
|
1073
1049
|
|
|
1074
|
-
|
|
1075
1050
|
template<typename T, typename C, typename A>
|
|
1076
|
-
void quantiles_sketch<T, C, A>::merge_two_size_k_buffers(Level& src_1, Level& src_2,
|
|
1051
|
+
void quantiles_sketch<T, C, A>::merge_two_size_k_buffers(Level& src_1, Level& src_2,
|
|
1052
|
+
Level& dst, const C& comparator) {
|
|
1077
1053
|
if (src_1.size() != src_2.size()
|
|
1078
1054
|
|| src_1.size() * 2 != dst.capacity()
|
|
1079
1055
|
|| dst.size() != 0) {
|
|
@@ -1085,7 +1061,7 @@ void quantiles_sketch<T, C, A>::merge_two_size_k_buffers(Level& src_1, Level& sr
|
|
|
1085
1061
|
|
|
1086
1062
|
// TODO: probably actually doing copies given Level&?
|
|
1087
1063
|
while (it1 != end1 && it2 != end2) {
|
|
1088
|
-
if (
|
|
1064
|
+
if (comparator(*it1, *it2)) {
|
|
1089
1065
|
dst.push_back(std::move(*it1++));
|
|
1090
1066
|
} else {
|
|
1091
1067
|
dst.push_back(std::move(*it2++));
|
|
@@ -1100,7 +1076,6 @@ void quantiles_sketch<T, C, A>::merge_two_size_k_buffers(Level& src_1, Level& sr
|
|
|
1100
1076
|
}
|
|
1101
1077
|
}
|
|
1102
1078
|
|
|
1103
|
-
|
|
1104
1079
|
template<typename T, typename C, typename A>
|
|
1105
1080
|
template<typename FwdSk>
|
|
1106
1081
|
void quantiles_sketch<T, C, A>::standard_merge(quantiles_sketch& tgt, FwdSk&& src) {
|
|
@@ -1149,25 +1124,24 @@ void quantiles_sketch<T, C, A>::standard_merge(quantiles_sketch& tgt, FwdSk&& sr
|
|
|
1149
1124
|
throw std::logic_error("Failed internal consistency check after standard_merge()");
|
|
1150
1125
|
}
|
|
1151
1126
|
|
|
1152
|
-
// update min and max
|
|
1127
|
+
// update min and max items
|
|
1153
1128
|
// can't just check is_empty() since min and max might not have been set if
|
|
1154
1129
|
// there were no base buffer items added via update()
|
|
1155
|
-
if (tgt.
|
|
1156
|
-
tgt.
|
|
1130
|
+
if (tgt.min_item_ == nullptr) {
|
|
1131
|
+
tgt.min_item_ = new (tgt.allocator_.allocate(1)) T(*src.min_item_);
|
|
1157
1132
|
} else {
|
|
1158
|
-
if (
|
|
1159
|
-
*tgt.
|
|
1133
|
+
if (tgt.comparator_(*src.min_item_, *tgt.min_item_))
|
|
1134
|
+
*tgt.min_item_ = conditional_forward<FwdSk>(*src.min_item_);
|
|
1160
1135
|
}
|
|
1161
1136
|
|
|
1162
|
-
if (tgt.
|
|
1163
|
-
tgt.
|
|
1137
|
+
if (tgt.max_item_ == nullptr) {
|
|
1138
|
+
tgt.max_item_ = new (tgt.allocator_.allocate(1)) T(*src.max_item_);
|
|
1164
1139
|
} else {
|
|
1165
|
-
if (
|
|
1166
|
-
*tgt.
|
|
1140
|
+
if (tgt.comparator_(*tgt.max_item_, *src.max_item_))
|
|
1141
|
+
*tgt.max_item_ = conditional_forward<FwdSk>(*src.max_item_);
|
|
1167
1142
|
}
|
|
1168
1143
|
}
|
|
1169
1144
|
|
|
1170
|
-
|
|
1171
1145
|
template<typename T, typename C, typename A>
|
|
1172
1146
|
template<typename FwdSk>
|
|
1173
1147
|
void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&& src) {
|
|
@@ -1226,25 +1200,24 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
|
|
|
1226
1200
|
throw std::logic_error("Failed internal consistency check after downsampling_merge()");
|
|
1227
1201
|
}
|
|
1228
1202
|
|
|
1229
|
-
// update min and max
|
|
1203
|
+
// update min and max items
|
|
1230
1204
|
// can't just check is_empty() since min and max might not have been set if
|
|
1231
1205
|
// there were no base buffer items added via update()
|
|
1232
|
-
if (tgt.
|
|
1233
|
-
tgt.
|
|
1206
|
+
if (tgt.min_item_ == nullptr) {
|
|
1207
|
+
tgt.min_item_ = new (tgt.allocator_.allocate(1)) T(*src.min_item_);
|
|
1234
1208
|
} else {
|
|
1235
|
-
if (
|
|
1236
|
-
*tgt.
|
|
1209
|
+
if (tgt.comparator_(*src.min_item_, *tgt.min_item_))
|
|
1210
|
+
*tgt.min_item_ = conditional_forward<FwdSk>(*src.min_item_);
|
|
1237
1211
|
}
|
|
1238
1212
|
|
|
1239
|
-
if (tgt.
|
|
1240
|
-
tgt.
|
|
1213
|
+
if (tgt.max_item_ == nullptr) {
|
|
1214
|
+
tgt.max_item_ = new (tgt.allocator_.allocate(1)) T(*src.max_item_);
|
|
1241
1215
|
} else {
|
|
1242
|
-
if (
|
|
1243
|
-
*tgt.
|
|
1216
|
+
if (tgt.comparator_(*tgt.max_item_, *src.max_item_))
|
|
1217
|
+
*tgt.max_item_ = conditional_forward<FwdSk>(*src.max_item_);
|
|
1244
1218
|
}
|
|
1245
1219
|
}
|
|
1246
1220
|
|
|
1247
|
-
|
|
1248
1221
|
template<typename T, typename C, typename A>
|
|
1249
1222
|
uint8_t quantiles_sketch<T, C, A>::lowest_zero_bit_starting_at(uint64_t bits, uint8_t starting_bit) {
|
|
1250
1223
|
uint8_t pos = starting_bit & 0X3F;
|
|
@@ -1292,6 +1265,23 @@ class quantiles_sketch<T, C, A>::items_deleter {
|
|
|
1292
1265
|
size_t num_;
|
|
1293
1266
|
};
|
|
1294
1267
|
|
|
1268
|
+
template<typename T, typename C, typename A>
|
|
1269
|
+
void quantiles_sketch<T, C, A>::setup_sorted_view() const {
|
|
1270
|
+
if (sorted_view_ == nullptr) {
|
|
1271
|
+
using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
|
|
1272
|
+
sorted_view_ = new (AllocSortedView(allocator_).allocate(1)) quantiles_sorted_view<T, C, A>(get_sorted_view());
|
|
1273
|
+
}
|
|
1274
|
+
}
|
|
1275
|
+
|
|
1276
|
+
template<typename T, typename C, typename A>
|
|
1277
|
+
void quantiles_sketch<T, C, A>::reset_sorted_view() {
|
|
1278
|
+
if (sorted_view_ != nullptr) {
|
|
1279
|
+
sorted_view_->~quantiles_sorted_view();
|
|
1280
|
+
using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
|
|
1281
|
+
AllocSortedView(allocator_).deallocate(sorted_view_, 1);
|
|
1282
|
+
sorted_view_ = nullptr;
|
|
1283
|
+
}
|
|
1284
|
+
}
|
|
1295
1285
|
|
|
1296
1286
|
// quantiles_sketch::const_iterator implementation
|
|
1297
1287
|
|
|
@@ -1364,8 +1354,13 @@ bool quantiles_sketch<T, C, A>::const_iterator::operator!=(const const_iterator&
|
|
|
1364
1354
|
}
|
|
1365
1355
|
|
|
1366
1356
|
template<typename T, typename C, typename A>
|
|
1367
|
-
|
|
1368
|
-
return
|
|
1357
|
+
auto quantiles_sketch<T, C, A>::const_iterator::operator*() const -> const value_type {
|
|
1358
|
+
return value_type(level_ == -1 ? base_buffer_[index_] : levels_[level_][index_], weight_);
|
|
1359
|
+
}
|
|
1360
|
+
|
|
1361
|
+
template<typename T, typename C, typename A>
|
|
1362
|
+
auto quantiles_sketch<T, C, A>::const_iterator::operator->() const -> const return_value_holder<value_type> {
|
|
1363
|
+
return **this;
|
|
1369
1364
|
}
|
|
1370
1365
|
|
|
1371
1366
|
} /* namespace datasketches */
|