datasketches 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/kll_wrapper.cpp +20 -20
- data/ext/datasketches/theta_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
- data/vendor/datasketches-cpp/MANIFEST.in +21 -2
- data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
- data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
- data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
- data/vendor/datasketches-cpp/pyproject.toml +17 -12
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
- data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/setup.py +14 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
- data/vendor/datasketches-cpp/tox.ini +26 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
- data/vendor/datasketches-cpp/version.cfg.in +1 -0
- metadata +14 -5
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
|
@@ -25,8 +25,9 @@
|
|
|
25
25
|
|
|
26
26
|
namespace datasketches {
|
|
27
27
|
|
|
28
|
-
template<typename T, typename C, typename
|
|
29
|
-
req_sketch<T, C,
|
|
28
|
+
template<typename T, typename C, typename A>
|
|
29
|
+
req_sketch<T, C, A>::req_sketch(uint16_t k, bool hra, const C& comparator, const A& allocator):
|
|
30
|
+
comparator_(comparator),
|
|
30
31
|
allocator_(allocator),
|
|
31
32
|
k_(std::max<uint8_t>(static_cast<int>(k) & -2, static_cast<int>(req_constants::MIN_K))), //rounds down one if odd
|
|
32
33
|
hra_(hra),
|
|
@@ -34,26 +35,29 @@ max_nom_size_(0),
|
|
|
34
35
|
num_retained_(0),
|
|
35
36
|
n_(0),
|
|
36
37
|
compactors_(allocator),
|
|
37
|
-
|
|
38
|
-
|
|
38
|
+
min_item_(nullptr),
|
|
39
|
+
max_item_(nullptr),
|
|
40
|
+
sorted_view_(nullptr)
|
|
39
41
|
{
|
|
40
42
|
grow();
|
|
41
43
|
}
|
|
42
44
|
|
|
43
|
-
template<typename T, typename C, typename
|
|
44
|
-
req_sketch<T, C,
|
|
45
|
-
if (
|
|
46
|
-
|
|
47
|
-
allocator_.deallocate(
|
|
45
|
+
template<typename T, typename C, typename A>
|
|
46
|
+
req_sketch<T, C, A>::~req_sketch() {
|
|
47
|
+
if (min_item_ != nullptr) {
|
|
48
|
+
min_item_->~T();
|
|
49
|
+
allocator_.deallocate(min_item_, 1);
|
|
48
50
|
}
|
|
49
|
-
if (
|
|
50
|
-
|
|
51
|
-
allocator_.deallocate(
|
|
51
|
+
if (max_item_ != nullptr) {
|
|
52
|
+
max_item_->~T();
|
|
53
|
+
allocator_.deallocate(max_item_, 1);
|
|
52
54
|
}
|
|
55
|
+
reset_sorted_view();
|
|
53
56
|
}
|
|
54
57
|
|
|
55
|
-
template<typename T, typename C, typename
|
|
56
|
-
req_sketch<T, C,
|
|
58
|
+
template<typename T, typename C, typename A>
|
|
59
|
+
req_sketch<T, C, A>::req_sketch(const req_sketch& other):
|
|
60
|
+
comparator_(other.comparator_),
|
|
57
61
|
allocator_(other.allocator_),
|
|
58
62
|
k_(other.k_),
|
|
59
63
|
hra_(other.hra_),
|
|
@@ -61,15 +65,17 @@ max_nom_size_(other.max_nom_size_),
|
|
|
61
65
|
num_retained_(other.num_retained_),
|
|
62
66
|
n_(other.n_),
|
|
63
67
|
compactors_(other.compactors_),
|
|
64
|
-
|
|
65
|
-
|
|
68
|
+
min_item_(nullptr),
|
|
69
|
+
max_item_(nullptr),
|
|
70
|
+
sorted_view_(nullptr)
|
|
66
71
|
{
|
|
67
|
-
if (other.
|
|
68
|
-
if (other.
|
|
72
|
+
if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
|
|
73
|
+
if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
|
|
69
74
|
}
|
|
70
75
|
|
|
71
|
-
template<typename T, typename C, typename
|
|
72
|
-
req_sketch<T, C,
|
|
76
|
+
template<typename T, typename C, typename A>
|
|
77
|
+
req_sketch<T, C, A>::req_sketch(req_sketch&& other) noexcept :
|
|
78
|
+
comparator_(std::move(other.comparator_)),
|
|
73
79
|
allocator_(std::move(other.allocator_)),
|
|
74
80
|
k_(other.k_),
|
|
75
81
|
hra_(other.hra_),
|
|
@@ -77,16 +83,18 @@ max_nom_size_(other.max_nom_size_),
|
|
|
77
83
|
num_retained_(other.num_retained_),
|
|
78
84
|
n_(other.n_),
|
|
79
85
|
compactors_(std::move(other.compactors_)),
|
|
80
|
-
|
|
81
|
-
|
|
86
|
+
min_item_(other.min_item_),
|
|
87
|
+
max_item_(other.max_item_),
|
|
88
|
+
sorted_view_(nullptr)
|
|
82
89
|
{
|
|
83
|
-
other.
|
|
84
|
-
other.
|
|
90
|
+
other.min_item_ = nullptr;
|
|
91
|
+
other.max_item_ = nullptr;
|
|
85
92
|
}
|
|
86
93
|
|
|
87
|
-
template<typename T, typename C, typename
|
|
88
|
-
req_sketch<T, C,
|
|
94
|
+
template<typename T, typename C, typename A>
|
|
95
|
+
req_sketch<T, C, A>& req_sketch<T, C, A>::operator=(const req_sketch& other) {
|
|
89
96
|
req_sketch copy(other);
|
|
97
|
+
std::swap(comparator_, copy.comparator_);
|
|
90
98
|
std::swap(allocator_, copy.allocator_);
|
|
91
99
|
std::swap(k_, copy.k_);
|
|
92
100
|
std::swap(hra_, copy.hra_);
|
|
@@ -94,13 +102,15 @@ req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(const req_sketch& othe
|
|
|
94
102
|
std::swap(num_retained_, copy.num_retained_);
|
|
95
103
|
std::swap(n_, copy.n_);
|
|
96
104
|
std::swap(compactors_, copy.compactors_);
|
|
97
|
-
std::swap(
|
|
98
|
-
std::swap(
|
|
105
|
+
std::swap(min_item_, copy.min_item_);
|
|
106
|
+
std::swap(max_item_, copy.max_item_);
|
|
107
|
+
reset_sorted_view();
|
|
99
108
|
return *this;
|
|
100
109
|
}
|
|
101
110
|
|
|
102
|
-
template<typename T, typename C, typename
|
|
103
|
-
req_sketch<T, C,
|
|
111
|
+
template<typename T, typename C, typename A>
|
|
112
|
+
req_sketch<T, C, A>& req_sketch<T, C, A>::operator=(req_sketch&& other) {
|
|
113
|
+
std::swap(comparator_, other.comparator_);
|
|
104
114
|
std::swap(allocator_, other.allocator_);
|
|
105
115
|
std::swap(k_, other.k_);
|
|
106
116
|
std::swap(hra_, other.hra_);
|
|
@@ -108,14 +118,16 @@ req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(req_sketch&& other) {
|
|
|
108
118
|
std::swap(num_retained_, other.num_retained_);
|
|
109
119
|
std::swap(n_, other.n_);
|
|
110
120
|
std::swap(compactors_, other.compactors_);
|
|
111
|
-
std::swap(
|
|
112
|
-
std::swap(
|
|
121
|
+
std::swap(min_item_, other.min_item_);
|
|
122
|
+
std::swap(max_item_, other.max_item_);
|
|
123
|
+
reset_sorted_view();
|
|
113
124
|
return *this;
|
|
114
125
|
}
|
|
115
126
|
|
|
116
|
-
template<typename T, typename C, typename
|
|
117
|
-
template<typename TT, typename CC, typename
|
|
118
|
-
req_sketch<T, C,
|
|
127
|
+
template<typename T, typename C, typename A>
|
|
128
|
+
template<typename TT, typename CC, typename AA>
|
|
129
|
+
req_sketch<T, C, A>::req_sketch(const req_sketch<TT, CC, AA>& other, const C& comparator, const A& allocator):
|
|
130
|
+
comparator_(comparator),
|
|
119
131
|
allocator_(allocator),
|
|
120
132
|
k_(other.k_),
|
|
121
133
|
hra_(other.hra_),
|
|
@@ -123,8 +135,9 @@ max_nom_size_(other.max_nom_size_),
|
|
|
123
135
|
num_retained_(other.num_retained_),
|
|
124
136
|
n_(other.n_),
|
|
125
137
|
compactors_(allocator),
|
|
126
|
-
|
|
127
|
-
|
|
138
|
+
min_item_(nullptr),
|
|
139
|
+
max_item_(nullptr),
|
|
140
|
+
sorted_view_(nullptr)
|
|
128
141
|
{
|
|
129
142
|
static_assert(
|
|
130
143
|
std::is_constructible<T, TT>::value,
|
|
@@ -132,72 +145,73 @@ max_value_(nullptr)
|
|
|
132
145
|
);
|
|
133
146
|
compactors_.reserve(other.compactors_.size());
|
|
134
147
|
for (const auto& compactor: other.compactors_) {
|
|
135
|
-
compactors_.push_back(req_compactor<T, C, A>(compactor, allocator_));
|
|
148
|
+
compactors_.push_back(req_compactor<T, C, A>(compactor, comparator_, allocator_));
|
|
136
149
|
}
|
|
137
150
|
if (!other.is_empty()) {
|
|
138
|
-
|
|
139
|
-
|
|
151
|
+
min_item_ = new (allocator_.allocate(1)) T(other.get_min_item());
|
|
152
|
+
max_item_ = new (allocator_.allocate(1)) T(other.get_max_item());
|
|
140
153
|
}
|
|
141
154
|
}
|
|
142
155
|
|
|
143
|
-
template<typename T, typename C, typename
|
|
144
|
-
uint16_t req_sketch<T, C,
|
|
156
|
+
template<typename T, typename C, typename A>
|
|
157
|
+
uint16_t req_sketch<T, C, A>::get_k() const {
|
|
145
158
|
return k_;
|
|
146
159
|
}
|
|
147
160
|
|
|
148
|
-
template<typename T, typename C, typename
|
|
149
|
-
bool req_sketch<T, C,
|
|
161
|
+
template<typename T, typename C, typename A>
|
|
162
|
+
bool req_sketch<T, C, A>::is_HRA() const {
|
|
150
163
|
return hra_;
|
|
151
164
|
}
|
|
152
165
|
|
|
153
|
-
template<typename T, typename C, typename
|
|
154
|
-
bool req_sketch<T, C,
|
|
166
|
+
template<typename T, typename C, typename A>
|
|
167
|
+
bool req_sketch<T, C, A>::is_empty() const {
|
|
155
168
|
return n_ == 0;
|
|
156
169
|
}
|
|
157
170
|
|
|
158
|
-
template<typename T, typename C, typename
|
|
159
|
-
uint64_t req_sketch<T, C,
|
|
171
|
+
template<typename T, typename C, typename A>
|
|
172
|
+
uint64_t req_sketch<T, C, A>::get_n() const {
|
|
160
173
|
return n_;
|
|
161
174
|
}
|
|
162
175
|
|
|
163
|
-
template<typename T, typename C, typename
|
|
164
|
-
uint32_t req_sketch<T, C,
|
|
176
|
+
template<typename T, typename C, typename A>
|
|
177
|
+
uint32_t req_sketch<T, C, A>::get_num_retained() const {
|
|
165
178
|
return num_retained_;
|
|
166
179
|
}
|
|
167
180
|
|
|
168
|
-
template<typename T, typename C, typename
|
|
169
|
-
bool req_sketch<T, C,
|
|
181
|
+
template<typename T, typename C, typename A>
|
|
182
|
+
bool req_sketch<T, C, A>::is_estimation_mode() const {
|
|
170
183
|
return compactors_.size() > 1;
|
|
171
184
|
}
|
|
172
185
|
|
|
173
|
-
template<typename T, typename C, typename
|
|
186
|
+
template<typename T, typename C, typename A>
|
|
174
187
|
template<typename FwdT>
|
|
175
|
-
void req_sketch<T, C,
|
|
176
|
-
if (!
|
|
188
|
+
void req_sketch<T, C, A>::update(FwdT&& item) {
|
|
189
|
+
if (!check_update_item(item)) { return; }
|
|
177
190
|
if (is_empty()) {
|
|
178
|
-
|
|
179
|
-
|
|
191
|
+
min_item_ = new (allocator_.allocate(1)) T(item);
|
|
192
|
+
max_item_ = new (allocator_.allocate(1)) T(item);
|
|
180
193
|
} else {
|
|
181
|
-
if (
|
|
182
|
-
if (
|
|
194
|
+
if (comparator_(item, *min_item_)) *min_item_ = item;
|
|
195
|
+
if (comparator_(*max_item_, item)) *max_item_ = item;
|
|
183
196
|
}
|
|
184
197
|
compactors_[0].append(std::forward<FwdT>(item));
|
|
185
198
|
++num_retained_;
|
|
186
199
|
++n_;
|
|
187
200
|
if (num_retained_ == max_nom_size_) compress();
|
|
201
|
+
reset_sorted_view();
|
|
188
202
|
}
|
|
189
203
|
|
|
190
|
-
template<typename T, typename C, typename
|
|
204
|
+
template<typename T, typename C, typename A>
|
|
191
205
|
template<typename FwdSk>
|
|
192
|
-
void req_sketch<T, C,
|
|
206
|
+
void req_sketch<T, C, A>::merge(FwdSk&& other) {
|
|
193
207
|
if (is_HRA() != other.is_HRA()) throw std::invalid_argument("merging HRA and LRA is not valid");
|
|
194
208
|
if (other.is_empty()) return;
|
|
195
209
|
if (is_empty()) {
|
|
196
|
-
|
|
197
|
-
|
|
210
|
+
min_item_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.min_item_));
|
|
211
|
+
max_item_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.max_item_));
|
|
198
212
|
} else {
|
|
199
|
-
if (
|
|
200
|
-
if (
|
|
213
|
+
if (comparator_(*other.min_item_, *min_item_)) *min_item_ = conditional_forward<FwdSk>(*other.min_item_);
|
|
214
|
+
if (comparator_(*max_item_, *other.max_item_)) *max_item_ = conditional_forward<FwdSk>(*other.max_item_);
|
|
201
215
|
}
|
|
202
216
|
// grow until this has at least as many compactors as other
|
|
203
217
|
while (get_num_levels() < other.get_num_levels()) grow();
|
|
@@ -209,128 +223,117 @@ void req_sketch<T, C, S, A>::merge(FwdSk&& other) {
|
|
|
209
223
|
update_max_nom_size();
|
|
210
224
|
update_num_retained();
|
|
211
225
|
if (num_retained_ >= max_nom_size_) compress();
|
|
226
|
+
reset_sorted_view();
|
|
212
227
|
}
|
|
213
228
|
|
|
214
|
-
template<typename T, typename C, typename
|
|
215
|
-
const T& req_sketch<T, C,
|
|
216
|
-
if (is_empty())
|
|
217
|
-
return *
|
|
229
|
+
template<typename T, typename C, typename A>
|
|
230
|
+
const T& req_sketch<T, C, A>::get_min_item() const {
|
|
231
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
232
|
+
return *min_item_;
|
|
218
233
|
}
|
|
219
234
|
|
|
220
|
-
template<typename T, typename C, typename
|
|
221
|
-
const T& req_sketch<T, C,
|
|
222
|
-
if (is_empty())
|
|
223
|
-
return *
|
|
235
|
+
template<typename T, typename C, typename A>
|
|
236
|
+
const T& req_sketch<T, C, A>::get_max_item() const {
|
|
237
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
238
|
+
return *max_item_;
|
|
224
239
|
}
|
|
225
240
|
|
|
226
|
-
template<typename T, typename C, typename
|
|
227
|
-
C req_sketch<T, C,
|
|
228
|
-
return
|
|
241
|
+
template<typename T, typename C, typename A>
|
|
242
|
+
C req_sketch<T, C, A>::get_comparator() const {
|
|
243
|
+
return comparator_;
|
|
229
244
|
}
|
|
230
245
|
|
|
231
|
-
template<typename T, typename C, typename
|
|
232
|
-
|
|
233
|
-
|
|
246
|
+
template<typename T, typename C, typename A>
|
|
247
|
+
A req_sketch<T, C, A>::get_allocator() const {
|
|
248
|
+
return allocator_;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
template<typename T, typename C, typename A>
|
|
252
|
+
double req_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
|
|
253
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
234
254
|
uint64_t weight = 0;
|
|
235
255
|
for (const auto& compactor: compactors_) {
|
|
236
|
-
weight += compactor.
|
|
256
|
+
weight += compactor.compute_weight(item, inclusive);
|
|
237
257
|
}
|
|
238
258
|
return static_cast<double>(weight) / n_;
|
|
239
259
|
}
|
|
240
260
|
|
|
241
|
-
template<typename T, typename C, typename
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
for (uint32_t i = size; i > 0; --i) {
|
|
247
|
-
buckets[i] -= buckets[i - 1];
|
|
248
|
-
}
|
|
249
|
-
return buckets;
|
|
261
|
+
template<typename T, typename C, typename A>
|
|
262
|
+
auto req_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
|
|
263
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
264
|
+
setup_sorted_view();
|
|
265
|
+
return sorted_view_->get_PMF(split_points, size, inclusive);
|
|
250
266
|
}
|
|
251
267
|
|
|
252
|
-
template<typename T, typename C, typename
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
check_split_points(split_points, size);
|
|
258
|
-
buckets.reserve(size + 1);
|
|
259
|
-
for (uint32_t i = 0; i < size; ++i) buckets.push_back(get_rank<inclusive>(split_points[i]));
|
|
260
|
-
buckets.push_back(1);
|
|
261
|
-
return buckets;
|
|
268
|
+
template<typename T, typename C, typename A>
|
|
269
|
+
auto req_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
|
|
270
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
271
|
+
setup_sorted_view();
|
|
272
|
+
return sorted_view_->get_CDF(split_points, size, inclusive);
|
|
262
273
|
}
|
|
263
274
|
|
|
264
|
-
template<typename T, typename C, typename
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
if (is_empty()) return get_invalid_value();
|
|
268
|
-
if (rank == 0.0) return *min_value_;
|
|
269
|
-
if (rank == 1.0) return *max_value_;
|
|
275
|
+
template<typename T, typename C, typename A>
|
|
276
|
+
auto req_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
|
|
277
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
270
278
|
if ((rank < 0.0) || (rank > 1.0)) {
|
|
271
|
-
throw std::invalid_argument("
|
|
279
|
+
throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
|
|
272
280
|
}
|
|
273
281
|
// possible side-effect of sorting level zero
|
|
274
|
-
|
|
282
|
+
setup_sorted_view();
|
|
283
|
+
return sorted_view_->get_quantile(rank, inclusive);
|
|
275
284
|
}
|
|
276
285
|
|
|
277
|
-
template<typename T, typename C, typename
|
|
278
|
-
|
|
279
|
-
|
|
286
|
+
template<typename T, typename C, typename A>
|
|
287
|
+
std::vector<T, A> req_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
|
|
288
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
280
289
|
std::vector<T, A> quantiles(allocator_);
|
|
281
|
-
if (is_empty()) return quantiles;
|
|
282
290
|
quantiles.reserve(size);
|
|
283
291
|
|
|
284
292
|
// possible side-effect of sorting level zero
|
|
285
|
-
|
|
293
|
+
setup_sorted_view();
|
|
286
294
|
|
|
287
295
|
for (uint32_t i = 0; i < size; ++i) {
|
|
288
296
|
const double rank = ranks[i];
|
|
289
297
|
if ((rank < 0.0) || (rank > 1.0)) {
|
|
290
|
-
throw std::invalid_argument("rank cannot be less than
|
|
291
|
-
}
|
|
292
|
-
if (rank == 0.0) quantiles.push_back(*min_value_);
|
|
293
|
-
else if (rank == 1.0) quantiles.push_back(*max_value_);
|
|
294
|
-
else {
|
|
295
|
-
quantiles.push_back(view.get_quantile(rank));
|
|
298
|
+
throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
|
|
296
299
|
}
|
|
300
|
+
quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
|
|
297
301
|
}
|
|
298
302
|
return quantiles;
|
|
299
303
|
}
|
|
300
304
|
|
|
301
|
-
template<typename T, typename C, typename
|
|
302
|
-
|
|
303
|
-
quantile_sketch_sorted_view<T, C, A> req_sketch<T, C, S, A>::get_sorted_view(bool cumulative) const {
|
|
305
|
+
template<typename T, typename C, typename A>
|
|
306
|
+
quantiles_sorted_view<T, C, A> req_sketch<T, C, A>::get_sorted_view() const {
|
|
304
307
|
if (!compactors_[0].is_sorted()) {
|
|
305
308
|
const_cast<Compactor&>(compactors_[0]).sort(); // allow this side effect
|
|
306
309
|
}
|
|
307
|
-
|
|
310
|
+
quantiles_sorted_view<T, C, A> view(get_num_retained(), comparator_, allocator_);
|
|
308
311
|
|
|
309
312
|
for (auto& compactor: compactors_) {
|
|
310
313
|
view.add(compactor.begin(), compactor.end(), 1 << compactor.get_lg_weight());
|
|
311
314
|
}
|
|
312
315
|
|
|
313
|
-
|
|
316
|
+
view.convert_to_cummulative();
|
|
314
317
|
return view;
|
|
315
318
|
}
|
|
316
319
|
|
|
317
|
-
template<typename T, typename C, typename
|
|
318
|
-
double req_sketch<T, C,
|
|
320
|
+
template<typename T, typename C, typename A>
|
|
321
|
+
double req_sketch<T, C, A>::get_rank_lower_bound(double rank, uint8_t num_std_dev) const {
|
|
319
322
|
return get_rank_lb(get_k(), get_num_levels(), rank, num_std_dev, get_n(), hra_);
|
|
320
323
|
}
|
|
321
324
|
|
|
322
|
-
template<typename T, typename C, typename
|
|
323
|
-
double req_sketch<T, C,
|
|
325
|
+
template<typename T, typename C, typename A>
|
|
326
|
+
double req_sketch<T, C, A>::get_rank_upper_bound(double rank, uint8_t num_std_dev) const {
|
|
324
327
|
return get_rank_ub(get_k(), get_num_levels(), rank, num_std_dev, get_n(), hra_);
|
|
325
328
|
}
|
|
326
329
|
|
|
327
|
-
template<typename T, typename C, typename
|
|
328
|
-
double req_sketch<T, C,
|
|
330
|
+
template<typename T, typename C, typename A>
|
|
331
|
+
double req_sketch<T, C, A>::get_RSE(uint16_t k, double rank, bool hra, uint64_t n) {
|
|
329
332
|
return get_rank_lb(k, 2, rank, 1, n, hra);
|
|
330
333
|
}
|
|
331
334
|
|
|
332
|
-
template<typename T, typename C, typename
|
|
333
|
-
double req_sketch<T, C,
|
|
335
|
+
template<typename T, typename C, typename A>
|
|
336
|
+
double req_sketch<T, C, A>::get_rank_lb(uint16_t k, uint8_t num_levels, double rank, uint8_t num_std_dev, uint64_t n, bool hra) {
|
|
334
337
|
if (is_exact_rank(k, num_levels, rank, n, hra)) return rank;
|
|
335
338
|
const double relative = relative_rse_factor() / k * (hra ? 1.0 - rank : rank);
|
|
336
339
|
const double fixed = FIXED_RSE_FACTOR / k;
|
|
@@ -339,8 +342,8 @@ double req_sketch<T, C, S, A>::get_rank_lb(uint16_t k, uint8_t num_levels, doubl
|
|
|
339
342
|
return std::max(lb_rel, lb_fix);
|
|
340
343
|
}
|
|
341
344
|
|
|
342
|
-
template<typename T, typename C, typename
|
|
343
|
-
double req_sketch<T, C,
|
|
345
|
+
template<typename T, typename C, typename A>
|
|
346
|
+
double req_sketch<T, C, A>::get_rank_ub(uint16_t k, uint8_t num_levels, double rank, uint8_t num_std_dev, uint64_t n, bool hra) {
|
|
344
347
|
if (is_exact_rank(k, num_levels, rank, n, hra)) return rank;
|
|
345
348
|
const double relative = relative_rse_factor() / k * (hra ? 1.0 - rank : rank);
|
|
346
349
|
const double fixed = FIXED_RSE_FACTOR / k;
|
|
@@ -349,23 +352,23 @@ double req_sketch<T, C, S, A>::get_rank_ub(uint16_t k, uint8_t num_levels, doubl
|
|
|
349
352
|
return std::min(ub_rel, ub_fix);
|
|
350
353
|
}
|
|
351
354
|
|
|
352
|
-
template<typename T, typename C, typename
|
|
353
|
-
bool req_sketch<T, C,
|
|
355
|
+
template<typename T, typename C, typename A>
|
|
356
|
+
bool req_sketch<T, C, A>::is_exact_rank(uint16_t k, uint8_t num_levels, double rank, uint64_t n, bool hra) {
|
|
354
357
|
const unsigned base_cap = k * req_constants::INIT_NUM_SECTIONS;
|
|
355
358
|
if (num_levels == 1 || n <= base_cap) return true;
|
|
356
359
|
const double exact_rank_thresh = static_cast<double>(base_cap) / n;
|
|
357
360
|
return (hra && rank >= 1.0 - exact_rank_thresh) || (!hra && rank <= exact_rank_thresh);
|
|
358
361
|
}
|
|
359
362
|
|
|
360
|
-
template<typename T, typename C, typename
|
|
361
|
-
double req_sketch<T, C,
|
|
363
|
+
template<typename T, typename C, typename A>
|
|
364
|
+
double req_sketch<T, C, A>::relative_rse_factor() {
|
|
362
365
|
return sqrt(0.0512 / req_constants::INIT_NUM_SECTIONS);
|
|
363
366
|
}
|
|
364
367
|
|
|
365
368
|
// implementation for fixed-size arithmetic types (integral and floating point)
|
|
366
|
-
template<typename T, typename C, typename
|
|
369
|
+
template<typename T, typename C, typename A>
|
|
367
370
|
template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
|
|
368
|
-
size_t req_sketch<T, C,
|
|
371
|
+
size_t req_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& sd) const {
|
|
369
372
|
size_t size = PREAMBLE_SIZE_BYTES;
|
|
370
373
|
if (is_empty()) return size;
|
|
371
374
|
if (is_estimation_mode()) {
|
|
@@ -380,15 +383,15 @@ size_t req_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const
|
|
|
380
383
|
}
|
|
381
384
|
|
|
382
385
|
// implementation for all other types
|
|
383
|
-
template<typename T, typename C, typename
|
|
386
|
+
template<typename T, typename C, typename A>
|
|
384
387
|
template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
|
|
385
|
-
size_t req_sketch<T, C,
|
|
388
|
+
size_t req_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& sd) const {
|
|
386
389
|
size_t size = PREAMBLE_SIZE_BYTES;
|
|
387
390
|
if (is_empty()) return size;
|
|
388
391
|
if (is_estimation_mode()) {
|
|
389
392
|
size += sizeof(n_);
|
|
390
|
-
size += sd.size_of_item(*
|
|
391
|
-
size += sd.size_of_item(*
|
|
393
|
+
size += sd.size_of_item(*min_item_);
|
|
394
|
+
size += sd.size_of_item(*max_item_);
|
|
392
395
|
}
|
|
393
396
|
if (n_ == 1) {
|
|
394
397
|
size += sd.size_of_item(*compactors_[0].begin());
|
|
@@ -398,9 +401,9 @@ size_t req_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const
|
|
|
398
401
|
return size;
|
|
399
402
|
}
|
|
400
403
|
|
|
401
|
-
template<typename T, typename C, typename
|
|
404
|
+
template<typename T, typename C, typename A>
|
|
402
405
|
template<typename SerDe>
|
|
403
|
-
void req_sketch<T, C,
|
|
406
|
+
void req_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& sd) const {
|
|
404
407
|
const uint8_t preamble_ints = is_estimation_mode() ? 4 : 2;
|
|
405
408
|
write(os, preamble_ints);
|
|
406
409
|
const uint8_t serial_version = SERIAL_VERSION;
|
|
@@ -423,8 +426,8 @@ void req_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const
|
|
|
423
426
|
if (is_empty()) return;
|
|
424
427
|
if (is_estimation_mode()) {
|
|
425
428
|
write(os, n_);
|
|
426
|
-
sd.serialize(os,
|
|
427
|
-
sd.serialize(os,
|
|
429
|
+
sd.serialize(os, min_item_, 1);
|
|
430
|
+
sd.serialize(os, max_item_, 1);
|
|
428
431
|
}
|
|
429
432
|
if (raw_items) {
|
|
430
433
|
sd.serialize(os, compactors_[0].begin(), num_raw_items);
|
|
@@ -433,9 +436,9 @@ void req_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const
|
|
|
433
436
|
}
|
|
434
437
|
}
|
|
435
438
|
|
|
436
|
-
template<typename T, typename C, typename
|
|
439
|
+
template<typename T, typename C, typename A>
|
|
437
440
|
template<typename SerDe>
|
|
438
|
-
auto req_sketch<T, C,
|
|
441
|
+
auto req_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
|
|
439
442
|
const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
|
|
440
443
|
vector_bytes bytes(size, 0, allocator_);
|
|
441
444
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
|
@@ -463,8 +466,8 @@ auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const SerDe&
|
|
|
463
466
|
if (!is_empty()) {
|
|
464
467
|
if (is_estimation_mode()) {
|
|
465
468
|
ptr += copy_to_mem(n_, ptr);
|
|
466
|
-
ptr += sd.serialize(ptr, end_ptr - ptr,
|
|
467
|
-
ptr += sd.serialize(ptr, end_ptr - ptr,
|
|
469
|
+
ptr += sd.serialize(ptr, end_ptr - ptr, min_item_, 1);
|
|
470
|
+
ptr += sd.serialize(ptr, end_ptr - ptr, max_item_, 1);
|
|
468
471
|
}
|
|
469
472
|
if (raw_items) {
|
|
470
473
|
ptr += sd.serialize(ptr, end_ptr - ptr, compactors_[0].begin(), num_raw_items);
|
|
@@ -475,14 +478,9 @@ auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const SerDe&
|
|
|
475
478
|
return bytes;
|
|
476
479
|
}
|
|
477
480
|
|
|
478
|
-
template<typename T, typename C, typename
|
|
479
|
-
req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, const A& allocator) {
|
|
480
|
-
return deserialize(is, S(), allocator);
|
|
481
|
-
}
|
|
482
|
-
|
|
483
|
-
template<typename T, typename C, typename S, typename A>
|
|
481
|
+
template<typename T, typename C, typename A>
|
|
484
482
|
template<typename SerDe>
|
|
485
|
-
req_sketch<T, C,
|
|
483
|
+
req_sketch<T, C, A> req_sketch<T, C, A>::deserialize(std::istream& is, const SerDe& sd, const C& comparator, const A& allocator) {
|
|
486
484
|
const auto preamble_ints = read<uint8_t>(is);
|
|
487
485
|
const auto serial_version = read<uint8_t>(is);
|
|
488
486
|
const auto family_id = read<uint8_t>(is);
|
|
@@ -498,14 +496,14 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, con
|
|
|
498
496
|
if (!is.good()) throw std::runtime_error("error reading from std::istream");
|
|
499
497
|
const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
|
|
500
498
|
const bool hra = flags_byte & (1 << flags::IS_HIGH_RANK);
|
|
501
|
-
if (is_empty) return req_sketch(k, hra, allocator);
|
|
499
|
+
if (is_empty) return req_sketch(k, hra, comparator, allocator);
|
|
502
500
|
|
|
503
501
|
A alloc(allocator);
|
|
504
502
|
auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
|
|
505
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
|
506
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
|
507
|
-
std::unique_ptr<T, item_deleter>
|
|
508
|
-
std::unique_ptr<T, item_deleter>
|
|
503
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
|
504
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
|
505
|
+
std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
|
|
506
|
+
std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
|
|
509
507
|
|
|
510
508
|
const bool raw_items = flags_byte & (1 << flags::RAW_ITEMS);
|
|
511
509
|
const bool is_level_0_sorted = flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED);
|
|
@@ -514,19 +512,19 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, con
|
|
|
514
512
|
uint64_t n = 1;
|
|
515
513
|
if (num_levels > 1) {
|
|
516
514
|
n = read<uint64_t>(is);
|
|
517
|
-
sd.deserialize(is,
|
|
515
|
+
sd.deserialize(is, min_item_buffer.get(), 1);
|
|
518
516
|
// serde call did not throw, repackage with destrtuctor
|
|
519
|
-
|
|
520
|
-
sd.deserialize(is,
|
|
517
|
+
min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
|
|
518
|
+
sd.deserialize(is, max_item_buffer.get(), 1);
|
|
521
519
|
// serde call did not throw, repackage with destrtuctor
|
|
522
|
-
|
|
520
|
+
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
|
523
521
|
}
|
|
524
522
|
|
|
525
523
|
if (raw_items) {
|
|
526
|
-
compactors.push_back(Compactor::deserialize(is, sd, allocator, is_level_0_sorted, k, num_raw_items, hra));
|
|
524
|
+
compactors.push_back(Compactor::deserialize(is, sd, comparator, allocator, is_level_0_sorted, k, num_raw_items, hra));
|
|
527
525
|
} else {
|
|
528
526
|
for (size_t i = 0; i < num_levels; ++i) {
|
|
529
|
-
compactors.push_back(Compactor::deserialize(is, sd, allocator, i == 0 ? is_level_0_sorted : true, hra));
|
|
527
|
+
compactors.push_back(Compactor::deserialize(is, sd, comparator, allocator, i == 0 ? is_level_0_sorted : true, hra));
|
|
530
528
|
}
|
|
531
529
|
}
|
|
532
530
|
if (num_levels == 1) {
|
|
@@ -536,29 +534,24 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, con
|
|
|
536
534
|
auto min_it = begin;
|
|
537
535
|
auto max_it = begin;
|
|
538
536
|
for (auto it = begin; it != end; ++it) {
|
|
539
|
-
if (
|
|
540
|
-
if (
|
|
537
|
+
if (comparator(*it, *min_it)) min_it = it;
|
|
538
|
+
if (comparator(*max_it, *it)) max_it = it;
|
|
541
539
|
}
|
|
542
|
-
new (
|
|
540
|
+
new (min_item_buffer.get()) T(*min_it);
|
|
543
541
|
// copy did not throw, repackage with destrtuctor
|
|
544
|
-
|
|
545
|
-
new (
|
|
542
|
+
min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
|
|
543
|
+
new (max_item_buffer.get()) T(*max_it);
|
|
546
544
|
// copy did not throw, repackage with destrtuctor
|
|
547
|
-
|
|
545
|
+
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
|
548
546
|
}
|
|
549
547
|
|
|
550
548
|
if (!is.good()) throw std::runtime_error("error reading from std::istream");
|
|
551
|
-
return req_sketch(k, hra, n, std::move(
|
|
552
|
-
}
|
|
553
|
-
|
|
554
|
-
template<typename T, typename C, typename S, typename A>
|
|
555
|
-
req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
|
|
556
|
-
return deserialize(bytes, size, S(), allocator);
|
|
549
|
+
return req_sketch(k, hra, n, std::move(min_item), std::move(max_item), std::move(compactors), comparator);
|
|
557
550
|
}
|
|
558
551
|
|
|
559
|
-
template<typename T, typename C, typename
|
|
552
|
+
template<typename T, typename C, typename A>
|
|
560
553
|
template<typename SerDe>
|
|
561
|
-
req_sketch<T, C,
|
|
554
|
+
req_sketch<T, C, A> req_sketch<T, C, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const C& comparator, const A& allocator) {
|
|
562
555
|
ensure_minimum_memory(size, 8);
|
|
563
556
|
const char* ptr = static_cast<const char*>(bytes);
|
|
564
557
|
const char* end_ptr = static_cast<const char*>(bytes) + size;
|
|
@@ -584,14 +577,14 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, si
|
|
|
584
577
|
|
|
585
578
|
const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
|
|
586
579
|
const bool hra = flags_byte & (1 << flags::IS_HIGH_RANK);
|
|
587
|
-
if (is_empty) return req_sketch(k, hra, allocator);
|
|
580
|
+
if (is_empty) return req_sketch(k, hra, comparator, allocator);
|
|
588
581
|
|
|
589
582
|
A alloc(allocator);
|
|
590
583
|
auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
|
|
591
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
|
592
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
|
593
|
-
std::unique_ptr<T, item_deleter>
|
|
594
|
-
std::unique_ptr<T, item_deleter>
|
|
584
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
|
585
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
|
586
|
+
std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
|
|
587
|
+
std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
|
|
595
588
|
|
|
596
589
|
const bool raw_items = flags_byte & (1 << flags::RAW_ITEMS);
|
|
597
590
|
const bool is_level_0_sorted = flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED);
|
|
@@ -601,21 +594,21 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, si
|
|
|
601
594
|
if (num_levels > 1) {
|
|
602
595
|
ensure_minimum_memory(end_ptr - ptr, sizeof(n));
|
|
603
596
|
ptr += copy_from_mem(ptr, n);
|
|
604
|
-
ptr += sd.deserialize(ptr, end_ptr - ptr,
|
|
597
|
+
ptr += sd.deserialize(ptr, end_ptr - ptr, min_item_buffer.get(), 1);
|
|
605
598
|
// serde call did not throw, repackage with destrtuctor
|
|
606
|
-
|
|
607
|
-
ptr += sd.deserialize(ptr, end_ptr - ptr,
|
|
599
|
+
min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
|
|
600
|
+
ptr += sd.deserialize(ptr, end_ptr - ptr, max_item_buffer.get(), 1);
|
|
608
601
|
// serde call did not throw, repackage with destrtuctor
|
|
609
|
-
|
|
602
|
+
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
|
610
603
|
}
|
|
611
604
|
|
|
612
605
|
if (raw_items) {
|
|
613
|
-
auto pair = Compactor::deserialize(ptr, end_ptr - ptr, sd, allocator, is_level_0_sorted, k, num_raw_items, hra);
|
|
606
|
+
auto pair = Compactor::deserialize(ptr, end_ptr - ptr, sd, comparator, allocator, is_level_0_sorted, k, num_raw_items, hra);
|
|
614
607
|
compactors.push_back(std::move(pair.first));
|
|
615
608
|
ptr += pair.second;
|
|
616
609
|
} else {
|
|
617
610
|
for (size_t i = 0; i < num_levels; ++i) {
|
|
618
|
-
auto pair = Compactor::deserialize(ptr, end_ptr - ptr, sd, allocator, i == 0 ? is_level_0_sorted : true, hra);
|
|
611
|
+
auto pair = Compactor::deserialize(ptr, end_ptr - ptr, sd, comparator, allocator, i == 0 ? is_level_0_sorted : true, hra);
|
|
619
612
|
compactors.push_back(std::move(pair.first));
|
|
620
613
|
ptr += pair.second;
|
|
621
614
|
}
|
|
@@ -627,46 +620,46 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, si
|
|
|
627
620
|
auto min_it = begin;
|
|
628
621
|
auto max_it = begin;
|
|
629
622
|
for (auto it = begin; it != end; ++it) {
|
|
630
|
-
if (
|
|
631
|
-
if (
|
|
623
|
+
if (comparator(*it, *min_it)) min_it = it;
|
|
624
|
+
if (comparator(*max_it, *it)) max_it = it;
|
|
632
625
|
}
|
|
633
|
-
new (
|
|
626
|
+
new (min_item_buffer.get()) T(*min_it);
|
|
634
627
|
// copy did not throw, repackage with destrtuctor
|
|
635
|
-
|
|
636
|
-
new (
|
|
628
|
+
min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
|
|
629
|
+
new (max_item_buffer.get()) T(*max_it);
|
|
637
630
|
// copy did not throw, repackage with destrtuctor
|
|
638
|
-
|
|
631
|
+
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
|
639
632
|
}
|
|
640
633
|
|
|
641
|
-
return req_sketch(k, hra, n, std::move(
|
|
634
|
+
return req_sketch(k, hra, n, std::move(min_item), std::move(max_item), std::move(compactors), comparator);
|
|
642
635
|
}
|
|
643
636
|
|
|
644
|
-
template<typename T, typename C, typename
|
|
645
|
-
void req_sketch<T, C,
|
|
637
|
+
template<typename T, typename C, typename A>
|
|
638
|
+
void req_sketch<T, C, A>::grow() {
|
|
646
639
|
const uint8_t lg_weight = get_num_levels();
|
|
647
|
-
compactors_.push_back(Compactor(hra_, lg_weight, k_, allocator_));
|
|
640
|
+
compactors_.push_back(Compactor(hra_, lg_weight, k_, comparator_, allocator_));
|
|
648
641
|
update_max_nom_size();
|
|
649
642
|
}
|
|
650
643
|
|
|
651
|
-
template<typename T, typename C, typename
|
|
652
|
-
uint8_t req_sketch<T, C,
|
|
644
|
+
template<typename T, typename C, typename A>
|
|
645
|
+
uint8_t req_sketch<T, C, A>::get_num_levels() const {
|
|
653
646
|
return static_cast<uint8_t>(compactors_.size());
|
|
654
647
|
}
|
|
655
648
|
|
|
656
|
-
template<typename T, typename C, typename
|
|
657
|
-
void req_sketch<T, C,
|
|
649
|
+
template<typename T, typename C, typename A>
|
|
650
|
+
void req_sketch<T, C, A>::update_max_nom_size() {
|
|
658
651
|
max_nom_size_ = 0;
|
|
659
652
|
for (const auto& compactor: compactors_) max_nom_size_ += compactor.get_nom_capacity();
|
|
660
653
|
}
|
|
661
654
|
|
|
662
|
-
template<typename T, typename C, typename
|
|
663
|
-
void req_sketch<T, C,
|
|
655
|
+
template<typename T, typename C, typename A>
|
|
656
|
+
void req_sketch<T, C, A>::update_num_retained() {
|
|
664
657
|
num_retained_ = 0;
|
|
665
658
|
for (const auto& compactor: compactors_) num_retained_ += compactor.get_num_items();
|
|
666
659
|
}
|
|
667
660
|
|
|
668
|
-
template<typename T, typename C, typename
|
|
669
|
-
void req_sketch<T, C,
|
|
661
|
+
template<typename T, typename C, typename A>
|
|
662
|
+
void req_sketch<T, C, A>::compress() {
|
|
670
663
|
for (size_t h = 0; h < compactors_.size(); ++h) {
|
|
671
664
|
if (compactors_[h].get_num_items() >= compactors_[h].get_nom_capacity()) {
|
|
672
665
|
if (h == 0) compactors_[0].sort();
|
|
@@ -681,8 +674,8 @@ void req_sketch<T, C, S, A>::compress() {
|
|
|
681
674
|
}
|
|
682
675
|
}
|
|
683
676
|
|
|
684
|
-
template<typename T, typename C, typename
|
|
685
|
-
string<A> req_sketch<T, C,
|
|
677
|
+
template<typename T, typename C, typename A>
|
|
678
|
+
string<A> req_sketch<T, C, A>::to_string(bool print_levels, bool print_items) const {
|
|
686
679
|
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
|
687
680
|
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
|
688
681
|
std::ostringstream os;
|
|
@@ -697,8 +690,8 @@ string<A> req_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
|
|
|
697
690
|
os << " Retained items : " << num_retained_ << std::endl;
|
|
698
691
|
os << " Capacity items : " << max_nom_size_ << std::endl;
|
|
699
692
|
if (!is_empty()) {
|
|
700
|
-
os << " Min
|
|
701
|
-
os << " Max
|
|
693
|
+
os << " Min item : " << *min_item_ << std::endl;
|
|
694
|
+
os << " Max item : " << *max_item_ << std::endl;
|
|
702
695
|
}
|
|
703
696
|
os << "### End sketch summary" << std::endl;
|
|
704
697
|
|
|
@@ -728,8 +721,8 @@ string<A> req_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
|
|
|
728
721
|
return string<A>(os.str().c_str(), allocator_);
|
|
729
722
|
}
|
|
730
723
|
|
|
731
|
-
template<typename T, typename C, typename
|
|
732
|
-
class req_sketch<T, C,
|
|
724
|
+
template<typename T, typename C, typename A>
|
|
725
|
+
class req_sketch<T, C, A>::item_deleter {
|
|
733
726
|
public:
|
|
734
727
|
item_deleter(const A& allocator): allocator_(allocator) {}
|
|
735
728
|
void operator() (T* ptr) {
|
|
@@ -742,8 +735,11 @@ class req_sketch<T, C, S, A>::item_deleter {
|
|
|
742
735
|
A allocator_;
|
|
743
736
|
};
|
|
744
737
|
|
|
745
|
-
template<typename T, typename C, typename
|
|
746
|
-
req_sketch<T, C,
|
|
738
|
+
template<typename T, typename C, typename A>
|
|
739
|
+
req_sketch<T, C, A>::req_sketch(uint16_t k, bool hra, uint64_t n,
|
|
740
|
+
std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item,
|
|
741
|
+
std::vector<Compactor, AllocCompactor>&& compactors, const C& comparator):
|
|
742
|
+
comparator_(comparator),
|
|
747
743
|
allocator_(compactors.get_allocator()),
|
|
748
744
|
k_(k),
|
|
749
745
|
hra_(hra),
|
|
@@ -751,15 +747,16 @@ max_nom_size_(0),
|
|
|
751
747
|
num_retained_(0),
|
|
752
748
|
n_(n),
|
|
753
749
|
compactors_(std::move(compactors)),
|
|
754
|
-
|
|
755
|
-
|
|
750
|
+
min_item_(min_item.release()),
|
|
751
|
+
max_item_(max_item.release()),
|
|
752
|
+
sorted_view_(nullptr)
|
|
756
753
|
{
|
|
757
754
|
update_max_nom_size();
|
|
758
755
|
update_num_retained();
|
|
759
756
|
}
|
|
760
757
|
|
|
761
|
-
template<typename T, typename C, typename
|
|
762
|
-
void req_sketch<T, C,
|
|
758
|
+
template<typename T, typename C, typename A>
|
|
759
|
+
void req_sketch<T, C, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t num_levels) {
|
|
763
760
|
const uint8_t expected_preamble_ints = num_levels > 1 ? 4 : 2;
|
|
764
761
|
if (preamble_ints != expected_preamble_ints) {
|
|
765
762
|
throw std::invalid_argument("Possible corruption: preamble ints must be "
|
|
@@ -767,8 +764,8 @@ void req_sketch<T, C, S, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t
|
|
|
767
764
|
}
|
|
768
765
|
}
|
|
769
766
|
|
|
770
|
-
template<typename T, typename C, typename
|
|
771
|
-
void req_sketch<T, C,
|
|
767
|
+
template<typename T, typename C, typename A>
|
|
768
|
+
void req_sketch<T, C, A>::check_serial_version(uint8_t serial_version) {
|
|
772
769
|
if (serial_version != SERIAL_VERSION) {
|
|
773
770
|
throw std::invalid_argument("Possible corruption: serial version mismatch: expected "
|
|
774
771
|
+ std::to_string(SERIAL_VERSION)
|
|
@@ -776,35 +773,53 @@ void req_sketch<T, C, S, A>::check_serial_version(uint8_t serial_version) {
|
|
|
776
773
|
}
|
|
777
774
|
}
|
|
778
775
|
|
|
779
|
-
template<typename T, typename C, typename
|
|
780
|
-
void req_sketch<T, C,
|
|
776
|
+
template<typename T, typename C, typename A>
|
|
777
|
+
void req_sketch<T, C, A>::check_family_id(uint8_t family_id) {
|
|
781
778
|
if (family_id != FAMILY) {
|
|
782
779
|
throw std::invalid_argument("Possible corruption: family mismatch: expected "
|
|
783
780
|
+ std::to_string(FAMILY) + ", got " + std::to_string(family_id));
|
|
784
781
|
}
|
|
785
782
|
}
|
|
786
783
|
|
|
787
|
-
template<typename T, typename C, typename
|
|
788
|
-
auto req_sketch<T, C,
|
|
784
|
+
template<typename T, typename C, typename A>
|
|
785
|
+
auto req_sketch<T, C, A>::begin() const -> const_iterator {
|
|
789
786
|
return const_iterator(compactors_.begin(), compactors_.end());
|
|
790
787
|
}
|
|
791
788
|
|
|
792
|
-
template<typename T, typename C, typename
|
|
793
|
-
auto req_sketch<T, C,
|
|
789
|
+
template<typename T, typename C, typename A>
|
|
790
|
+
auto req_sketch<T, C, A>::end() const -> const_iterator {
|
|
794
791
|
return const_iterator(compactors_.end(), compactors_.end());
|
|
795
792
|
}
|
|
796
793
|
|
|
794
|
+
template<typename T, typename C, typename A>
|
|
795
|
+
void req_sketch<T, C, A>::setup_sorted_view() const {
|
|
796
|
+
if (sorted_view_ == nullptr) {
|
|
797
|
+
using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
|
|
798
|
+
sorted_view_ = new (AllocSortedView(allocator_).allocate(1)) quantiles_sorted_view<T, C, A>(get_sorted_view());
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
template<typename T, typename C, typename A>
|
|
803
|
+
void req_sketch<T, C, A>::reset_sorted_view() {
|
|
804
|
+
if (sorted_view_ != nullptr) {
|
|
805
|
+
sorted_view_->~quantiles_sorted_view();
|
|
806
|
+
using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
|
|
807
|
+
AllocSortedView(allocator_).deallocate(sorted_view_, 1);
|
|
808
|
+
sorted_view_ = nullptr;
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
|
|
797
812
|
// iterator
|
|
798
813
|
|
|
799
|
-
template<typename T, typename C, typename
|
|
800
|
-
req_sketch<T, C,
|
|
814
|
+
template<typename T, typename C, typename A>
|
|
815
|
+
req_sketch<T, C, A>::const_iterator::const_iterator(LevelsIterator begin, LevelsIterator end):
|
|
801
816
|
levels_it_(begin),
|
|
802
817
|
levels_end_(end),
|
|
803
818
|
compactor_it_(begin == end ? nullptr : (*levels_it_).begin())
|
|
804
819
|
{}
|
|
805
820
|
|
|
806
|
-
template<typename T, typename C, typename
|
|
807
|
-
auto req_sketch<T, C,
|
|
821
|
+
template<typename T, typename C, typename A>
|
|
822
|
+
auto req_sketch<T, C, A>::const_iterator::operator++() -> const_iterator& {
|
|
808
823
|
++compactor_it_;
|
|
809
824
|
if (compactor_it_ == (*levels_it_).end()) {
|
|
810
825
|
++levels_it_;
|
|
@@ -813,28 +828,33 @@ auto req_sketch<T, C, S, A>::const_iterator::operator++() -> const_iterator& {
|
|
|
813
828
|
return *this;
|
|
814
829
|
}
|
|
815
830
|
|
|
816
|
-
template<typename T, typename C, typename
|
|
817
|
-
auto req_sketch<T, C,
|
|
831
|
+
template<typename T, typename C, typename A>
|
|
832
|
+
auto req_sketch<T, C, A>::const_iterator::operator++(int) -> const_iterator& {
|
|
818
833
|
const_iterator tmp(*this);
|
|
819
834
|
operator++();
|
|
820
835
|
return tmp;
|
|
821
836
|
}
|
|
822
837
|
|
|
823
|
-
template<typename T, typename C, typename
|
|
824
|
-
bool req_sketch<T, C,
|
|
838
|
+
template<typename T, typename C, typename A>
|
|
839
|
+
bool req_sketch<T, C, A>::const_iterator::operator==(const const_iterator& other) const {
|
|
825
840
|
if (levels_it_ != other.levels_it_) return false;
|
|
826
841
|
if (levels_it_ == levels_end_) return true;
|
|
827
842
|
return compactor_it_ == other.compactor_it_;
|
|
828
843
|
}
|
|
829
844
|
|
|
830
|
-
template<typename T, typename C, typename
|
|
831
|
-
bool req_sketch<T, C,
|
|
845
|
+
template<typename T, typename C, typename A>
|
|
846
|
+
bool req_sketch<T, C, A>::const_iterator::operator!=(const const_iterator& other) const {
|
|
832
847
|
return !operator==(other);
|
|
833
848
|
}
|
|
834
849
|
|
|
835
|
-
template<typename T, typename C, typename
|
|
836
|
-
|
|
837
|
-
return
|
|
850
|
+
template<typename T, typename C, typename A>
|
|
851
|
+
auto req_sketch<T, C, A>::const_iterator::operator*() const -> const value_type {
|
|
852
|
+
return value_type(*compactor_it_, 1ULL << (*levels_it_).get_lg_weight());
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
template<typename T, typename C, typename A>
|
|
856
|
+
auto req_sketch<T, C, A>::const_iterator::operator->() const -> const return_value_holder<value_type> {
|
|
857
|
+
return **this;
|
|
838
858
|
}
|
|
839
859
|
|
|
840
860
|
} /* namespace datasketches */
|