datasketches 0.2.6 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/LICENSE +4 -6
- data/NOTICE +6 -5
- data/ext/datasketches/kll_wrapper.cpp +20 -20
- data/ext/datasketches/theta_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
- data/vendor/datasketches-cpp/LICENSE +4 -6
- data/vendor/datasketches-cpp/MANIFEST.in +21 -4
- data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
- data/vendor/datasketches-cpp/common/{test/test_runner.cpp → include/version.hpp.in} +15 -8
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +37 -7
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +22 -1
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +1 -1
- data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +1 -1
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +17 -10
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +55 -42
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +4 -4
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +27 -27
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +197 -233
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +42 -32
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
- data/vendor/datasketches-cpp/pyproject.toml +17 -13
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/README.md +1 -1
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +19 -1
- data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
- data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +1 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +20 -19
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +241 -233
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +27 -27
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +117 -104
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +7 -7
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +3 -3
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +5 -5
- data/vendor/datasketches-cpp/setup.py +14 -3
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +3 -2
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/tox.ini +26 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +41 -35
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +27 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/version.cfg.in +1 -0
- metadata +14 -7
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
- data/vendor/datasketches-cpp/common/test/catch.hpp +0 -17618
@@ -25,8 +25,9 @@
|
|
25
25
|
|
26
26
|
namespace datasketches {
|
27
27
|
|
28
|
-
template<typename T, typename C, typename
|
29
|
-
req_sketch<T, C,
|
28
|
+
template<typename T, typename C, typename A>
|
29
|
+
req_sketch<T, C, A>::req_sketch(uint16_t k, bool hra, const C& comparator, const A& allocator):
|
30
|
+
comparator_(comparator),
|
30
31
|
allocator_(allocator),
|
31
32
|
k_(std::max<uint8_t>(static_cast<int>(k) & -2, static_cast<int>(req_constants::MIN_K))), //rounds down one if odd
|
32
33
|
hra_(hra),
|
@@ -34,26 +35,29 @@ max_nom_size_(0),
|
|
34
35
|
num_retained_(0),
|
35
36
|
n_(0),
|
36
37
|
compactors_(allocator),
|
37
|
-
|
38
|
-
|
38
|
+
min_item_(nullptr),
|
39
|
+
max_item_(nullptr),
|
40
|
+
sorted_view_(nullptr)
|
39
41
|
{
|
40
42
|
grow();
|
41
43
|
}
|
42
44
|
|
43
|
-
template<typename T, typename C, typename
|
44
|
-
req_sketch<T, C,
|
45
|
-
if (
|
46
|
-
|
47
|
-
allocator_.deallocate(
|
45
|
+
template<typename T, typename C, typename A>
|
46
|
+
req_sketch<T, C, A>::~req_sketch() {
|
47
|
+
if (min_item_ != nullptr) {
|
48
|
+
min_item_->~T();
|
49
|
+
allocator_.deallocate(min_item_, 1);
|
48
50
|
}
|
49
|
-
if (
|
50
|
-
|
51
|
-
allocator_.deallocate(
|
51
|
+
if (max_item_ != nullptr) {
|
52
|
+
max_item_->~T();
|
53
|
+
allocator_.deallocate(max_item_, 1);
|
52
54
|
}
|
55
|
+
reset_sorted_view();
|
53
56
|
}
|
54
57
|
|
55
|
-
template<typename T, typename C, typename
|
56
|
-
req_sketch<T, C,
|
58
|
+
template<typename T, typename C, typename A>
|
59
|
+
req_sketch<T, C, A>::req_sketch(const req_sketch& other):
|
60
|
+
comparator_(other.comparator_),
|
57
61
|
allocator_(other.allocator_),
|
58
62
|
k_(other.k_),
|
59
63
|
hra_(other.hra_),
|
@@ -61,15 +65,17 @@ max_nom_size_(other.max_nom_size_),
|
|
61
65
|
num_retained_(other.num_retained_),
|
62
66
|
n_(other.n_),
|
63
67
|
compactors_(other.compactors_),
|
64
|
-
|
65
|
-
|
68
|
+
min_item_(nullptr),
|
69
|
+
max_item_(nullptr),
|
70
|
+
sorted_view_(nullptr)
|
66
71
|
{
|
67
|
-
if (other.
|
68
|
-
if (other.
|
72
|
+
if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
|
73
|
+
if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
|
69
74
|
}
|
70
75
|
|
71
|
-
template<typename T, typename C, typename
|
72
|
-
req_sketch<T, C,
|
76
|
+
template<typename T, typename C, typename A>
|
77
|
+
req_sketch<T, C, A>::req_sketch(req_sketch&& other) noexcept :
|
78
|
+
comparator_(std::move(other.comparator_)),
|
73
79
|
allocator_(std::move(other.allocator_)),
|
74
80
|
k_(other.k_),
|
75
81
|
hra_(other.hra_),
|
@@ -77,16 +83,18 @@ max_nom_size_(other.max_nom_size_),
|
|
77
83
|
num_retained_(other.num_retained_),
|
78
84
|
n_(other.n_),
|
79
85
|
compactors_(std::move(other.compactors_)),
|
80
|
-
|
81
|
-
|
86
|
+
min_item_(other.min_item_),
|
87
|
+
max_item_(other.max_item_),
|
88
|
+
sorted_view_(nullptr)
|
82
89
|
{
|
83
|
-
other.
|
84
|
-
other.
|
90
|
+
other.min_item_ = nullptr;
|
91
|
+
other.max_item_ = nullptr;
|
85
92
|
}
|
86
93
|
|
87
|
-
template<typename T, typename C, typename
|
88
|
-
req_sketch<T, C,
|
94
|
+
template<typename T, typename C, typename A>
|
95
|
+
req_sketch<T, C, A>& req_sketch<T, C, A>::operator=(const req_sketch& other) {
|
89
96
|
req_sketch copy(other);
|
97
|
+
std::swap(comparator_, copy.comparator_);
|
90
98
|
std::swap(allocator_, copy.allocator_);
|
91
99
|
std::swap(k_, copy.k_);
|
92
100
|
std::swap(hra_, copy.hra_);
|
@@ -94,13 +102,15 @@ req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(const req_sketch& othe
|
|
94
102
|
std::swap(num_retained_, copy.num_retained_);
|
95
103
|
std::swap(n_, copy.n_);
|
96
104
|
std::swap(compactors_, copy.compactors_);
|
97
|
-
std::swap(
|
98
|
-
std::swap(
|
105
|
+
std::swap(min_item_, copy.min_item_);
|
106
|
+
std::swap(max_item_, copy.max_item_);
|
107
|
+
reset_sorted_view();
|
99
108
|
return *this;
|
100
109
|
}
|
101
110
|
|
102
|
-
template<typename T, typename C, typename
|
103
|
-
req_sketch<T, C,
|
111
|
+
template<typename T, typename C, typename A>
|
112
|
+
req_sketch<T, C, A>& req_sketch<T, C, A>::operator=(req_sketch&& other) {
|
113
|
+
std::swap(comparator_, other.comparator_);
|
104
114
|
std::swap(allocator_, other.allocator_);
|
105
115
|
std::swap(k_, other.k_);
|
106
116
|
std::swap(hra_, other.hra_);
|
@@ -108,14 +118,16 @@ req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(req_sketch&& other) {
|
|
108
118
|
std::swap(num_retained_, other.num_retained_);
|
109
119
|
std::swap(n_, other.n_);
|
110
120
|
std::swap(compactors_, other.compactors_);
|
111
|
-
std::swap(
|
112
|
-
std::swap(
|
121
|
+
std::swap(min_item_, other.min_item_);
|
122
|
+
std::swap(max_item_, other.max_item_);
|
123
|
+
reset_sorted_view();
|
113
124
|
return *this;
|
114
125
|
}
|
115
126
|
|
116
|
-
template<typename T, typename C, typename
|
117
|
-
template<typename TT, typename CC, typename
|
118
|
-
req_sketch<T, C,
|
127
|
+
template<typename T, typename C, typename A>
|
128
|
+
template<typename TT, typename CC, typename AA>
|
129
|
+
req_sketch<T, C, A>::req_sketch(const req_sketch<TT, CC, AA>& other, const C& comparator, const A& allocator):
|
130
|
+
comparator_(comparator),
|
119
131
|
allocator_(allocator),
|
120
132
|
k_(other.k_),
|
121
133
|
hra_(other.hra_),
|
@@ -123,8 +135,9 @@ max_nom_size_(other.max_nom_size_),
|
|
123
135
|
num_retained_(other.num_retained_),
|
124
136
|
n_(other.n_),
|
125
137
|
compactors_(allocator),
|
126
|
-
|
127
|
-
|
138
|
+
min_item_(nullptr),
|
139
|
+
max_item_(nullptr),
|
140
|
+
sorted_view_(nullptr)
|
128
141
|
{
|
129
142
|
static_assert(
|
130
143
|
std::is_constructible<T, TT>::value,
|
@@ -132,72 +145,73 @@ max_value_(nullptr)
|
|
132
145
|
);
|
133
146
|
compactors_.reserve(other.compactors_.size());
|
134
147
|
for (const auto& compactor: other.compactors_) {
|
135
|
-
compactors_.push_back(req_compactor<T, C, A>(compactor, allocator_));
|
148
|
+
compactors_.push_back(req_compactor<T, C, A>(compactor, comparator_, allocator_));
|
136
149
|
}
|
137
150
|
if (!other.is_empty()) {
|
138
|
-
|
139
|
-
|
151
|
+
min_item_ = new (allocator_.allocate(1)) T(other.get_min_item());
|
152
|
+
max_item_ = new (allocator_.allocate(1)) T(other.get_max_item());
|
140
153
|
}
|
141
154
|
}
|
142
155
|
|
143
|
-
template<typename T, typename C, typename
|
144
|
-
uint16_t req_sketch<T, C,
|
156
|
+
template<typename T, typename C, typename A>
|
157
|
+
uint16_t req_sketch<T, C, A>::get_k() const {
|
145
158
|
return k_;
|
146
159
|
}
|
147
160
|
|
148
|
-
template<typename T, typename C, typename
|
149
|
-
bool req_sketch<T, C,
|
161
|
+
template<typename T, typename C, typename A>
|
162
|
+
bool req_sketch<T, C, A>::is_HRA() const {
|
150
163
|
return hra_;
|
151
164
|
}
|
152
165
|
|
153
|
-
template<typename T, typename C, typename
|
154
|
-
bool req_sketch<T, C,
|
166
|
+
template<typename T, typename C, typename A>
|
167
|
+
bool req_sketch<T, C, A>::is_empty() const {
|
155
168
|
return n_ == 0;
|
156
169
|
}
|
157
170
|
|
158
|
-
template<typename T, typename C, typename
|
159
|
-
uint64_t req_sketch<T, C,
|
171
|
+
template<typename T, typename C, typename A>
|
172
|
+
uint64_t req_sketch<T, C, A>::get_n() const {
|
160
173
|
return n_;
|
161
174
|
}
|
162
175
|
|
163
|
-
template<typename T, typename C, typename
|
164
|
-
uint32_t req_sketch<T, C,
|
176
|
+
template<typename T, typename C, typename A>
|
177
|
+
uint32_t req_sketch<T, C, A>::get_num_retained() const {
|
165
178
|
return num_retained_;
|
166
179
|
}
|
167
180
|
|
168
|
-
template<typename T, typename C, typename
|
169
|
-
bool req_sketch<T, C,
|
181
|
+
template<typename T, typename C, typename A>
|
182
|
+
bool req_sketch<T, C, A>::is_estimation_mode() const {
|
170
183
|
return compactors_.size() > 1;
|
171
184
|
}
|
172
185
|
|
173
|
-
template<typename T, typename C, typename
|
186
|
+
template<typename T, typename C, typename A>
|
174
187
|
template<typename FwdT>
|
175
|
-
void req_sketch<T, C,
|
176
|
-
if (!
|
188
|
+
void req_sketch<T, C, A>::update(FwdT&& item) {
|
189
|
+
if (!check_update_item(item)) { return; }
|
177
190
|
if (is_empty()) {
|
178
|
-
|
179
|
-
|
191
|
+
min_item_ = new (allocator_.allocate(1)) T(item);
|
192
|
+
max_item_ = new (allocator_.allocate(1)) T(item);
|
180
193
|
} else {
|
181
|
-
if (
|
182
|
-
if (
|
194
|
+
if (comparator_(item, *min_item_)) *min_item_ = item;
|
195
|
+
if (comparator_(*max_item_, item)) *max_item_ = item;
|
183
196
|
}
|
184
197
|
compactors_[0].append(std::forward<FwdT>(item));
|
185
198
|
++num_retained_;
|
186
199
|
++n_;
|
187
200
|
if (num_retained_ == max_nom_size_) compress();
|
201
|
+
reset_sorted_view();
|
188
202
|
}
|
189
203
|
|
190
|
-
template<typename T, typename C, typename
|
204
|
+
template<typename T, typename C, typename A>
|
191
205
|
template<typename FwdSk>
|
192
|
-
void req_sketch<T, C,
|
206
|
+
void req_sketch<T, C, A>::merge(FwdSk&& other) {
|
193
207
|
if (is_HRA() != other.is_HRA()) throw std::invalid_argument("merging HRA and LRA is not valid");
|
194
208
|
if (other.is_empty()) return;
|
195
209
|
if (is_empty()) {
|
196
|
-
|
197
|
-
|
210
|
+
min_item_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.min_item_));
|
211
|
+
max_item_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.max_item_));
|
198
212
|
} else {
|
199
|
-
if (
|
200
|
-
if (
|
213
|
+
if (comparator_(*other.min_item_, *min_item_)) *min_item_ = conditional_forward<FwdSk>(*other.min_item_);
|
214
|
+
if (comparator_(*max_item_, *other.max_item_)) *max_item_ = conditional_forward<FwdSk>(*other.max_item_);
|
201
215
|
}
|
202
216
|
// grow until this has at least as many compactors as other
|
203
217
|
while (get_num_levels() < other.get_num_levels()) grow();
|
@@ -209,128 +223,117 @@ void req_sketch<T, C, S, A>::merge(FwdSk&& other) {
|
|
209
223
|
update_max_nom_size();
|
210
224
|
update_num_retained();
|
211
225
|
if (num_retained_ >= max_nom_size_) compress();
|
226
|
+
reset_sorted_view();
|
212
227
|
}
|
213
228
|
|
214
|
-
template<typename T, typename C, typename
|
215
|
-
const T& req_sketch<T, C,
|
216
|
-
if (is_empty())
|
217
|
-
return *
|
229
|
+
template<typename T, typename C, typename A>
|
230
|
+
const T& req_sketch<T, C, A>::get_min_item() const {
|
231
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
232
|
+
return *min_item_;
|
218
233
|
}
|
219
234
|
|
220
|
-
template<typename T, typename C, typename
|
221
|
-
const T& req_sketch<T, C,
|
222
|
-
if (is_empty())
|
223
|
-
return *
|
235
|
+
template<typename T, typename C, typename A>
|
236
|
+
const T& req_sketch<T, C, A>::get_max_item() const {
|
237
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
238
|
+
return *max_item_;
|
224
239
|
}
|
225
240
|
|
226
|
-
template<typename T, typename C, typename
|
227
|
-
C req_sketch<T, C,
|
228
|
-
return
|
241
|
+
template<typename T, typename C, typename A>
|
242
|
+
C req_sketch<T, C, A>::get_comparator() const {
|
243
|
+
return comparator_;
|
229
244
|
}
|
230
245
|
|
231
|
-
template<typename T, typename C, typename
|
232
|
-
|
233
|
-
|
246
|
+
template<typename T, typename C, typename A>
|
247
|
+
A req_sketch<T, C, A>::get_allocator() const {
|
248
|
+
return allocator_;
|
249
|
+
}
|
250
|
+
|
251
|
+
template<typename T, typename C, typename A>
|
252
|
+
double req_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
|
253
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
234
254
|
uint64_t weight = 0;
|
235
255
|
for (const auto& compactor: compactors_) {
|
236
|
-
weight += compactor.
|
256
|
+
weight += compactor.compute_weight(item, inclusive);
|
237
257
|
}
|
238
258
|
return static_cast<double>(weight) / n_;
|
239
259
|
}
|
240
260
|
|
241
|
-
template<typename T, typename C, typename
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
for (uint32_t i = size; i > 0; --i) {
|
247
|
-
buckets[i] -= buckets[i - 1];
|
248
|
-
}
|
249
|
-
return buckets;
|
261
|
+
template<typename T, typename C, typename A>
|
262
|
+
auto req_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
|
263
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
264
|
+
setup_sorted_view();
|
265
|
+
return sorted_view_->get_PMF(split_points, size, inclusive);
|
250
266
|
}
|
251
267
|
|
252
|
-
template<typename T, typename C, typename
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
check_split_points(split_points, size);
|
258
|
-
buckets.reserve(size + 1);
|
259
|
-
for (uint32_t i = 0; i < size; ++i) buckets.push_back(get_rank<inclusive>(split_points[i]));
|
260
|
-
buckets.push_back(1);
|
261
|
-
return buckets;
|
268
|
+
template<typename T, typename C, typename A>
|
269
|
+
auto req_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
|
270
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
271
|
+
setup_sorted_view();
|
272
|
+
return sorted_view_->get_CDF(split_points, size, inclusive);
|
262
273
|
}
|
263
274
|
|
264
|
-
template<typename T, typename C, typename
|
265
|
-
|
266
|
-
|
267
|
-
if (is_empty()) return get_invalid_value();
|
268
|
-
if (rank == 0.0) return *min_value_;
|
269
|
-
if (rank == 1.0) return *max_value_;
|
275
|
+
template<typename T, typename C, typename A>
|
276
|
+
auto req_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
|
277
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
270
278
|
if ((rank < 0.0) || (rank > 1.0)) {
|
271
|
-
throw std::invalid_argument("
|
279
|
+
throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
|
272
280
|
}
|
273
281
|
// possible side-effect of sorting level zero
|
274
|
-
|
282
|
+
setup_sorted_view();
|
283
|
+
return sorted_view_->get_quantile(rank, inclusive);
|
275
284
|
}
|
276
285
|
|
277
|
-
template<typename T, typename C, typename
|
278
|
-
|
279
|
-
|
286
|
+
template<typename T, typename C, typename A>
|
287
|
+
std::vector<T, A> req_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
|
288
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
280
289
|
std::vector<T, A> quantiles(allocator_);
|
281
|
-
if (is_empty()) return quantiles;
|
282
290
|
quantiles.reserve(size);
|
283
291
|
|
284
292
|
// possible side-effect of sorting level zero
|
285
|
-
|
293
|
+
setup_sorted_view();
|
286
294
|
|
287
295
|
for (uint32_t i = 0; i < size; ++i) {
|
288
296
|
const double rank = ranks[i];
|
289
297
|
if ((rank < 0.0) || (rank > 1.0)) {
|
290
|
-
throw std::invalid_argument("rank cannot be less than
|
291
|
-
}
|
292
|
-
if (rank == 0.0) quantiles.push_back(*min_value_);
|
293
|
-
else if (rank == 1.0) quantiles.push_back(*max_value_);
|
294
|
-
else {
|
295
|
-
quantiles.push_back(view.get_quantile(rank));
|
298
|
+
throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
|
296
299
|
}
|
300
|
+
quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
|
297
301
|
}
|
298
302
|
return quantiles;
|
299
303
|
}
|
300
304
|
|
301
|
-
template<typename T, typename C, typename
|
302
|
-
|
303
|
-
quantile_sketch_sorted_view<T, C, A> req_sketch<T, C, S, A>::get_sorted_view(bool cumulative) const {
|
305
|
+
template<typename T, typename C, typename A>
|
306
|
+
quantiles_sorted_view<T, C, A> req_sketch<T, C, A>::get_sorted_view() const {
|
304
307
|
if (!compactors_[0].is_sorted()) {
|
305
308
|
const_cast<Compactor&>(compactors_[0]).sort(); // allow this side effect
|
306
309
|
}
|
307
|
-
|
310
|
+
quantiles_sorted_view<T, C, A> view(get_num_retained(), comparator_, allocator_);
|
308
311
|
|
309
312
|
for (auto& compactor: compactors_) {
|
310
313
|
view.add(compactor.begin(), compactor.end(), 1 << compactor.get_lg_weight());
|
311
314
|
}
|
312
315
|
|
313
|
-
|
316
|
+
view.convert_to_cummulative();
|
314
317
|
return view;
|
315
318
|
}
|
316
319
|
|
317
|
-
template<typename T, typename C, typename
|
318
|
-
double req_sketch<T, C,
|
320
|
+
template<typename T, typename C, typename A>
|
321
|
+
double req_sketch<T, C, A>::get_rank_lower_bound(double rank, uint8_t num_std_dev) const {
|
319
322
|
return get_rank_lb(get_k(), get_num_levels(), rank, num_std_dev, get_n(), hra_);
|
320
323
|
}
|
321
324
|
|
322
|
-
template<typename T, typename C, typename
|
323
|
-
double req_sketch<T, C,
|
325
|
+
template<typename T, typename C, typename A>
|
326
|
+
double req_sketch<T, C, A>::get_rank_upper_bound(double rank, uint8_t num_std_dev) const {
|
324
327
|
return get_rank_ub(get_k(), get_num_levels(), rank, num_std_dev, get_n(), hra_);
|
325
328
|
}
|
326
329
|
|
327
|
-
template<typename T, typename C, typename
|
328
|
-
double req_sketch<T, C,
|
330
|
+
template<typename T, typename C, typename A>
|
331
|
+
double req_sketch<T, C, A>::get_RSE(uint16_t k, double rank, bool hra, uint64_t n) {
|
329
332
|
return get_rank_lb(k, 2, rank, 1, n, hra);
|
330
333
|
}
|
331
334
|
|
332
|
-
template<typename T, typename C, typename
|
333
|
-
double req_sketch<T, C,
|
335
|
+
template<typename T, typename C, typename A>
|
336
|
+
double req_sketch<T, C, A>::get_rank_lb(uint16_t k, uint8_t num_levels, double rank, uint8_t num_std_dev, uint64_t n, bool hra) {
|
334
337
|
if (is_exact_rank(k, num_levels, rank, n, hra)) return rank;
|
335
338
|
const double relative = relative_rse_factor() / k * (hra ? 1.0 - rank : rank);
|
336
339
|
const double fixed = FIXED_RSE_FACTOR / k;
|
@@ -339,8 +342,8 @@ double req_sketch<T, C, S, A>::get_rank_lb(uint16_t k, uint8_t num_levels, doubl
|
|
339
342
|
return std::max(lb_rel, lb_fix);
|
340
343
|
}
|
341
344
|
|
342
|
-
template<typename T, typename C, typename
|
343
|
-
double req_sketch<T, C,
|
345
|
+
template<typename T, typename C, typename A>
|
346
|
+
double req_sketch<T, C, A>::get_rank_ub(uint16_t k, uint8_t num_levels, double rank, uint8_t num_std_dev, uint64_t n, bool hra) {
|
344
347
|
if (is_exact_rank(k, num_levels, rank, n, hra)) return rank;
|
345
348
|
const double relative = relative_rse_factor() / k * (hra ? 1.0 - rank : rank);
|
346
349
|
const double fixed = FIXED_RSE_FACTOR / k;
|
@@ -349,23 +352,23 @@ double req_sketch<T, C, S, A>::get_rank_ub(uint16_t k, uint8_t num_levels, doubl
|
|
349
352
|
return std::min(ub_rel, ub_fix);
|
350
353
|
}
|
351
354
|
|
352
|
-
template<typename T, typename C, typename
|
353
|
-
bool req_sketch<T, C,
|
355
|
+
template<typename T, typename C, typename A>
|
356
|
+
bool req_sketch<T, C, A>::is_exact_rank(uint16_t k, uint8_t num_levels, double rank, uint64_t n, bool hra) {
|
354
357
|
const unsigned base_cap = k * req_constants::INIT_NUM_SECTIONS;
|
355
358
|
if (num_levels == 1 || n <= base_cap) return true;
|
356
359
|
const double exact_rank_thresh = static_cast<double>(base_cap) / n;
|
357
360
|
return (hra && rank >= 1.0 - exact_rank_thresh) || (!hra && rank <= exact_rank_thresh);
|
358
361
|
}
|
359
362
|
|
360
|
-
template<typename T, typename C, typename
|
361
|
-
double req_sketch<T, C,
|
363
|
+
template<typename T, typename C, typename A>
|
364
|
+
double req_sketch<T, C, A>::relative_rse_factor() {
|
362
365
|
return sqrt(0.0512 / req_constants::INIT_NUM_SECTIONS);
|
363
366
|
}
|
364
367
|
|
365
368
|
// implementation for fixed-size arithmetic types (integral and floating point)
|
366
|
-
template<typename T, typename C, typename
|
369
|
+
template<typename T, typename C, typename A>
|
367
370
|
template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
|
368
|
-
size_t req_sketch<T, C,
|
371
|
+
size_t req_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& sd) const {
|
369
372
|
size_t size = PREAMBLE_SIZE_BYTES;
|
370
373
|
if (is_empty()) return size;
|
371
374
|
if (is_estimation_mode()) {
|
@@ -380,15 +383,15 @@ size_t req_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const
|
|
380
383
|
}
|
381
384
|
|
382
385
|
// implementation for all other types
|
383
|
-
template<typename T, typename C, typename
|
386
|
+
template<typename T, typename C, typename A>
|
384
387
|
template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
|
385
|
-
size_t req_sketch<T, C,
|
388
|
+
size_t req_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& sd) const {
|
386
389
|
size_t size = PREAMBLE_SIZE_BYTES;
|
387
390
|
if (is_empty()) return size;
|
388
391
|
if (is_estimation_mode()) {
|
389
392
|
size += sizeof(n_);
|
390
|
-
size += sd.size_of_item(*
|
391
|
-
size += sd.size_of_item(*
|
393
|
+
size += sd.size_of_item(*min_item_);
|
394
|
+
size += sd.size_of_item(*max_item_);
|
392
395
|
}
|
393
396
|
if (n_ == 1) {
|
394
397
|
size += sd.size_of_item(*compactors_[0].begin());
|
@@ -398,9 +401,9 @@ size_t req_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const
|
|
398
401
|
return size;
|
399
402
|
}
|
400
403
|
|
401
|
-
template<typename T, typename C, typename
|
404
|
+
template<typename T, typename C, typename A>
|
402
405
|
template<typename SerDe>
|
403
|
-
void req_sketch<T, C,
|
406
|
+
void req_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& sd) const {
|
404
407
|
const uint8_t preamble_ints = is_estimation_mode() ? 4 : 2;
|
405
408
|
write(os, preamble_ints);
|
406
409
|
const uint8_t serial_version = SERIAL_VERSION;
|
@@ -423,8 +426,8 @@ void req_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const
|
|
423
426
|
if (is_empty()) return;
|
424
427
|
if (is_estimation_mode()) {
|
425
428
|
write(os, n_);
|
426
|
-
sd.serialize(os,
|
427
|
-
sd.serialize(os,
|
429
|
+
sd.serialize(os, min_item_, 1);
|
430
|
+
sd.serialize(os, max_item_, 1);
|
428
431
|
}
|
429
432
|
if (raw_items) {
|
430
433
|
sd.serialize(os, compactors_[0].begin(), num_raw_items);
|
@@ -433,9 +436,9 @@ void req_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const
|
|
433
436
|
}
|
434
437
|
}
|
435
438
|
|
436
|
-
template<typename T, typename C, typename
|
439
|
+
template<typename T, typename C, typename A>
|
437
440
|
template<typename SerDe>
|
438
|
-
auto req_sketch<T, C,
|
441
|
+
auto req_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
|
439
442
|
const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
|
440
443
|
vector_bytes bytes(size, 0, allocator_);
|
441
444
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
@@ -463,8 +466,8 @@ auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const SerDe&
|
|
463
466
|
if (!is_empty()) {
|
464
467
|
if (is_estimation_mode()) {
|
465
468
|
ptr += copy_to_mem(n_, ptr);
|
466
|
-
ptr += sd.serialize(ptr, end_ptr - ptr,
|
467
|
-
ptr += sd.serialize(ptr, end_ptr - ptr,
|
469
|
+
ptr += sd.serialize(ptr, end_ptr - ptr, min_item_, 1);
|
470
|
+
ptr += sd.serialize(ptr, end_ptr - ptr, max_item_, 1);
|
468
471
|
}
|
469
472
|
if (raw_items) {
|
470
473
|
ptr += sd.serialize(ptr, end_ptr - ptr, compactors_[0].begin(), num_raw_items);
|
@@ -475,14 +478,9 @@ auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const SerDe&
|
|
475
478
|
return bytes;
|
476
479
|
}
|
477
480
|
|
478
|
-
template<typename T, typename C, typename
|
479
|
-
req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, const A& allocator) {
|
480
|
-
return deserialize(is, S(), allocator);
|
481
|
-
}
|
482
|
-
|
483
|
-
template<typename T, typename C, typename S, typename A>
|
481
|
+
template<typename T, typename C, typename A>
|
484
482
|
template<typename SerDe>
|
485
|
-
req_sketch<T, C,
|
483
|
+
req_sketch<T, C, A> req_sketch<T, C, A>::deserialize(std::istream& is, const SerDe& sd, const C& comparator, const A& allocator) {
|
486
484
|
const auto preamble_ints = read<uint8_t>(is);
|
487
485
|
const auto serial_version = read<uint8_t>(is);
|
488
486
|
const auto family_id = read<uint8_t>(is);
|
@@ -498,14 +496,14 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, con
|
|
498
496
|
if (!is.good()) throw std::runtime_error("error reading from std::istream");
|
499
497
|
const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
|
500
498
|
const bool hra = flags_byte & (1 << flags::IS_HIGH_RANK);
|
501
|
-
if (is_empty) return req_sketch(k, hra, allocator);
|
499
|
+
if (is_empty) return req_sketch(k, hra, comparator, allocator);
|
502
500
|
|
503
501
|
A alloc(allocator);
|
504
502
|
auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
|
505
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
506
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
507
|
-
std::unique_ptr<T, item_deleter>
|
508
|
-
std::unique_ptr<T, item_deleter>
|
503
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
504
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
505
|
+
std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
|
506
|
+
std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
|
509
507
|
|
510
508
|
const bool raw_items = flags_byte & (1 << flags::RAW_ITEMS);
|
511
509
|
const bool is_level_0_sorted = flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED);
|
@@ -514,19 +512,19 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, con
|
|
514
512
|
uint64_t n = 1;
|
515
513
|
if (num_levels > 1) {
|
516
514
|
n = read<uint64_t>(is);
|
517
|
-
sd.deserialize(is,
|
515
|
+
sd.deserialize(is, min_item_buffer.get(), 1);
|
518
516
|
// serde call did not throw, repackage with destrtuctor
|
519
|
-
|
520
|
-
sd.deserialize(is,
|
517
|
+
min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
|
518
|
+
sd.deserialize(is, max_item_buffer.get(), 1);
|
521
519
|
// serde call did not throw, repackage with destrtuctor
|
522
|
-
|
520
|
+
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
523
521
|
}
|
524
522
|
|
525
523
|
if (raw_items) {
|
526
|
-
compactors.push_back(Compactor::deserialize(is, sd, allocator, is_level_0_sorted, k, num_raw_items, hra));
|
524
|
+
compactors.push_back(Compactor::deserialize(is, sd, comparator, allocator, is_level_0_sorted, k, num_raw_items, hra));
|
527
525
|
} else {
|
528
526
|
for (size_t i = 0; i < num_levels; ++i) {
|
529
|
-
compactors.push_back(Compactor::deserialize(is, sd, allocator, i == 0 ? is_level_0_sorted : true, hra));
|
527
|
+
compactors.push_back(Compactor::deserialize(is, sd, comparator, allocator, i == 0 ? is_level_0_sorted : true, hra));
|
530
528
|
}
|
531
529
|
}
|
532
530
|
if (num_levels == 1) {
|
@@ -536,29 +534,24 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, con
|
|
536
534
|
auto min_it = begin;
|
537
535
|
auto max_it = begin;
|
538
536
|
for (auto it = begin; it != end; ++it) {
|
539
|
-
if (
|
540
|
-
if (
|
537
|
+
if (comparator(*it, *min_it)) min_it = it;
|
538
|
+
if (comparator(*max_it, *it)) max_it = it;
|
541
539
|
}
|
542
|
-
new (
|
540
|
+
new (min_item_buffer.get()) T(*min_it);
|
543
541
|
// copy did not throw, repackage with destrtuctor
|
544
|
-
|
545
|
-
new (
|
542
|
+
min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
|
543
|
+
new (max_item_buffer.get()) T(*max_it);
|
546
544
|
// copy did not throw, repackage with destrtuctor
|
547
|
-
|
545
|
+
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
548
546
|
}
|
549
547
|
|
550
548
|
if (!is.good()) throw std::runtime_error("error reading from std::istream");
|
551
|
-
return req_sketch(k, hra, n, std::move(
|
552
|
-
}
|
553
|
-
|
554
|
-
template<typename T, typename C, typename S, typename A>
|
555
|
-
req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
|
556
|
-
return deserialize(bytes, size, S(), allocator);
|
549
|
+
return req_sketch(k, hra, n, std::move(min_item), std::move(max_item), std::move(compactors), comparator);
|
557
550
|
}
|
558
551
|
|
559
|
-
template<typename T, typename C, typename
|
552
|
+
template<typename T, typename C, typename A>
|
560
553
|
template<typename SerDe>
|
561
|
-
req_sketch<T, C,
|
554
|
+
req_sketch<T, C, A> req_sketch<T, C, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const C& comparator, const A& allocator) {
|
562
555
|
ensure_minimum_memory(size, 8);
|
563
556
|
const char* ptr = static_cast<const char*>(bytes);
|
564
557
|
const char* end_ptr = static_cast<const char*>(bytes) + size;
|
@@ -584,14 +577,14 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, si
|
|
584
577
|
|
585
578
|
const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
|
586
579
|
const bool hra = flags_byte & (1 << flags::IS_HIGH_RANK);
|
587
|
-
if (is_empty) return req_sketch(k, hra, allocator);
|
580
|
+
if (is_empty) return req_sketch(k, hra, comparator, allocator);
|
588
581
|
|
589
582
|
A alloc(allocator);
|
590
583
|
auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
|
591
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
592
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
593
|
-
std::unique_ptr<T, item_deleter>
|
594
|
-
std::unique_ptr<T, item_deleter>
|
584
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
585
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
586
|
+
std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
|
587
|
+
std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
|
595
588
|
|
596
589
|
const bool raw_items = flags_byte & (1 << flags::RAW_ITEMS);
|
597
590
|
const bool is_level_0_sorted = flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED);
|
@@ -601,21 +594,21 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, si
|
|
601
594
|
if (num_levels > 1) {
|
602
595
|
ensure_minimum_memory(end_ptr - ptr, sizeof(n));
|
603
596
|
ptr += copy_from_mem(ptr, n);
|
604
|
-
ptr += sd.deserialize(ptr, end_ptr - ptr,
|
597
|
+
ptr += sd.deserialize(ptr, end_ptr - ptr, min_item_buffer.get(), 1);
|
605
598
|
// serde call did not throw, repackage with destrtuctor
|
606
|
-
|
607
|
-
ptr += sd.deserialize(ptr, end_ptr - ptr,
|
599
|
+
min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
|
600
|
+
ptr += sd.deserialize(ptr, end_ptr - ptr, max_item_buffer.get(), 1);
|
608
601
|
// serde call did not throw, repackage with destrtuctor
|
609
|
-
|
602
|
+
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
610
603
|
}
|
611
604
|
|
612
605
|
if (raw_items) {
|
613
|
-
auto pair = Compactor::deserialize(ptr, end_ptr - ptr, sd, allocator, is_level_0_sorted, k, num_raw_items, hra);
|
606
|
+
auto pair = Compactor::deserialize(ptr, end_ptr - ptr, sd, comparator, allocator, is_level_0_sorted, k, num_raw_items, hra);
|
614
607
|
compactors.push_back(std::move(pair.first));
|
615
608
|
ptr += pair.second;
|
616
609
|
} else {
|
617
610
|
for (size_t i = 0; i < num_levels; ++i) {
|
618
|
-
auto pair = Compactor::deserialize(ptr, end_ptr - ptr, sd, allocator, i == 0 ? is_level_0_sorted : true, hra);
|
611
|
+
auto pair = Compactor::deserialize(ptr, end_ptr - ptr, sd, comparator, allocator, i == 0 ? is_level_0_sorted : true, hra);
|
619
612
|
compactors.push_back(std::move(pair.first));
|
620
613
|
ptr += pair.second;
|
621
614
|
}
|
@@ -627,46 +620,46 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, si
|
|
627
620
|
auto min_it = begin;
|
628
621
|
auto max_it = begin;
|
629
622
|
for (auto it = begin; it != end; ++it) {
|
630
|
-
if (
|
631
|
-
if (
|
623
|
+
if (comparator(*it, *min_it)) min_it = it;
|
624
|
+
if (comparator(*max_it, *it)) max_it = it;
|
632
625
|
}
|
633
|
-
new (
|
626
|
+
new (min_item_buffer.get()) T(*min_it);
|
634
627
|
// copy did not throw, repackage with destrtuctor
|
635
|
-
|
636
|
-
new (
|
628
|
+
min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
|
629
|
+
new (max_item_buffer.get()) T(*max_it);
|
637
630
|
// copy did not throw, repackage with destrtuctor
|
638
|
-
|
631
|
+
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
639
632
|
}
|
640
633
|
|
641
|
-
return req_sketch(k, hra, n, std::move(
|
634
|
+
return req_sketch(k, hra, n, std::move(min_item), std::move(max_item), std::move(compactors), comparator);
|
642
635
|
}
|
643
636
|
|
644
|
-
template<typename T, typename C, typename
|
645
|
-
void req_sketch<T, C,
|
637
|
+
template<typename T, typename C, typename A>
|
638
|
+
void req_sketch<T, C, A>::grow() {
|
646
639
|
const uint8_t lg_weight = get_num_levels();
|
647
|
-
compactors_.push_back(Compactor(hra_, lg_weight, k_, allocator_));
|
640
|
+
compactors_.push_back(Compactor(hra_, lg_weight, k_, comparator_, allocator_));
|
648
641
|
update_max_nom_size();
|
649
642
|
}
|
650
643
|
|
651
|
-
template<typename T, typename C, typename
|
652
|
-
uint8_t req_sketch<T, C,
|
644
|
+
template<typename T, typename C, typename A>
|
645
|
+
uint8_t req_sketch<T, C, A>::get_num_levels() const {
|
653
646
|
return static_cast<uint8_t>(compactors_.size());
|
654
647
|
}
|
655
648
|
|
656
|
-
template<typename T, typename C, typename
|
657
|
-
void req_sketch<T, C,
|
649
|
+
template<typename T, typename C, typename A>
|
650
|
+
void req_sketch<T, C, A>::update_max_nom_size() {
|
658
651
|
max_nom_size_ = 0;
|
659
652
|
for (const auto& compactor: compactors_) max_nom_size_ += compactor.get_nom_capacity();
|
660
653
|
}
|
661
654
|
|
662
|
-
template<typename T, typename C, typename
|
663
|
-
void req_sketch<T, C,
|
655
|
+
template<typename T, typename C, typename A>
|
656
|
+
void req_sketch<T, C, A>::update_num_retained() {
|
664
657
|
num_retained_ = 0;
|
665
658
|
for (const auto& compactor: compactors_) num_retained_ += compactor.get_num_items();
|
666
659
|
}
|
667
660
|
|
668
|
-
template<typename T, typename C, typename
|
669
|
-
void req_sketch<T, C,
|
661
|
+
template<typename T, typename C, typename A>
|
662
|
+
void req_sketch<T, C, A>::compress() {
|
670
663
|
for (size_t h = 0; h < compactors_.size(); ++h) {
|
671
664
|
if (compactors_[h].get_num_items() >= compactors_[h].get_nom_capacity()) {
|
672
665
|
if (h == 0) compactors_[0].sort();
|
@@ -681,8 +674,8 @@ void req_sketch<T, C, S, A>::compress() {
|
|
681
674
|
}
|
682
675
|
}
|
683
676
|
|
684
|
-
template<typename T, typename C, typename
|
685
|
-
string<A> req_sketch<T, C,
|
677
|
+
template<typename T, typename C, typename A>
|
678
|
+
string<A> req_sketch<T, C, A>::to_string(bool print_levels, bool print_items) const {
|
686
679
|
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
687
680
|
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
688
681
|
std::ostringstream os;
|
@@ -697,8 +690,8 @@ string<A> req_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
|
|
697
690
|
os << " Retained items : " << num_retained_ << std::endl;
|
698
691
|
os << " Capacity items : " << max_nom_size_ << std::endl;
|
699
692
|
if (!is_empty()) {
|
700
|
-
os << " Min
|
701
|
-
os << " Max
|
693
|
+
os << " Min item : " << *min_item_ << std::endl;
|
694
|
+
os << " Max item : " << *max_item_ << std::endl;
|
702
695
|
}
|
703
696
|
os << "### End sketch summary" << std::endl;
|
704
697
|
|
@@ -728,8 +721,8 @@ string<A> req_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
|
|
728
721
|
return string<A>(os.str().c_str(), allocator_);
|
729
722
|
}
|
730
723
|
|
731
|
-
template<typename T, typename C, typename
|
732
|
-
class req_sketch<T, C,
|
724
|
+
template<typename T, typename C, typename A>
|
725
|
+
class req_sketch<T, C, A>::item_deleter {
|
733
726
|
public:
|
734
727
|
item_deleter(const A& allocator): allocator_(allocator) {}
|
735
728
|
void operator() (T* ptr) {
|
@@ -742,8 +735,11 @@ class req_sketch<T, C, S, A>::item_deleter {
|
|
742
735
|
A allocator_;
|
743
736
|
};
|
744
737
|
|
745
|
-
template<typename T, typename C, typename
|
746
|
-
req_sketch<T, C,
|
738
|
+
template<typename T, typename C, typename A>
|
739
|
+
req_sketch<T, C, A>::req_sketch(uint16_t k, bool hra, uint64_t n,
|
740
|
+
std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item,
|
741
|
+
std::vector<Compactor, AllocCompactor>&& compactors, const C& comparator):
|
742
|
+
comparator_(comparator),
|
747
743
|
allocator_(compactors.get_allocator()),
|
748
744
|
k_(k),
|
749
745
|
hra_(hra),
|
@@ -751,15 +747,16 @@ max_nom_size_(0),
|
|
751
747
|
num_retained_(0),
|
752
748
|
n_(n),
|
753
749
|
compactors_(std::move(compactors)),
|
754
|
-
|
755
|
-
|
750
|
+
min_item_(min_item.release()),
|
751
|
+
max_item_(max_item.release()),
|
752
|
+
sorted_view_(nullptr)
|
756
753
|
{
|
757
754
|
update_max_nom_size();
|
758
755
|
update_num_retained();
|
759
756
|
}
|
760
757
|
|
761
|
-
template<typename T, typename C, typename
|
762
|
-
void req_sketch<T, C,
|
758
|
+
template<typename T, typename C, typename A>
|
759
|
+
void req_sketch<T, C, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t num_levels) {
|
763
760
|
const uint8_t expected_preamble_ints = num_levels > 1 ? 4 : 2;
|
764
761
|
if (preamble_ints != expected_preamble_ints) {
|
765
762
|
throw std::invalid_argument("Possible corruption: preamble ints must be "
|
@@ -767,8 +764,8 @@ void req_sketch<T, C, S, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t
|
|
767
764
|
}
|
768
765
|
}
|
769
766
|
|
770
|
-
template<typename T, typename C, typename
|
771
|
-
void req_sketch<T, C,
|
767
|
+
template<typename T, typename C, typename A>
|
768
|
+
void req_sketch<T, C, A>::check_serial_version(uint8_t serial_version) {
|
772
769
|
if (serial_version != SERIAL_VERSION) {
|
773
770
|
throw std::invalid_argument("Possible corruption: serial version mismatch: expected "
|
774
771
|
+ std::to_string(SERIAL_VERSION)
|
@@ -776,35 +773,53 @@ void req_sketch<T, C, S, A>::check_serial_version(uint8_t serial_version) {
|
|
776
773
|
}
|
777
774
|
}
|
778
775
|
|
779
|
-
template<typename T, typename C, typename
|
780
|
-
void req_sketch<T, C,
|
776
|
+
template<typename T, typename C, typename A>
|
777
|
+
void req_sketch<T, C, A>::check_family_id(uint8_t family_id) {
|
781
778
|
if (family_id != FAMILY) {
|
782
779
|
throw std::invalid_argument("Possible corruption: family mismatch: expected "
|
783
780
|
+ std::to_string(FAMILY) + ", got " + std::to_string(family_id));
|
784
781
|
}
|
785
782
|
}
|
786
783
|
|
787
|
-
template<typename T, typename C, typename
|
788
|
-
auto req_sketch<T, C,
|
784
|
+
template<typename T, typename C, typename A>
|
785
|
+
auto req_sketch<T, C, A>::begin() const -> const_iterator {
|
789
786
|
return const_iterator(compactors_.begin(), compactors_.end());
|
790
787
|
}
|
791
788
|
|
792
|
-
template<typename T, typename C, typename
|
793
|
-
auto req_sketch<T, C,
|
789
|
+
template<typename T, typename C, typename A>
|
790
|
+
auto req_sketch<T, C, A>::end() const -> const_iterator {
|
794
791
|
return const_iterator(compactors_.end(), compactors_.end());
|
795
792
|
}
|
796
793
|
|
794
|
+
template<typename T, typename C, typename A>
|
795
|
+
void req_sketch<T, C, A>::setup_sorted_view() const {
|
796
|
+
if (sorted_view_ == nullptr) {
|
797
|
+
using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
|
798
|
+
sorted_view_ = new (AllocSortedView(allocator_).allocate(1)) quantiles_sorted_view<T, C, A>(get_sorted_view());
|
799
|
+
}
|
800
|
+
}
|
801
|
+
|
802
|
+
template<typename T, typename C, typename A>
|
803
|
+
void req_sketch<T, C, A>::reset_sorted_view() {
|
804
|
+
if (sorted_view_ != nullptr) {
|
805
|
+
sorted_view_->~quantiles_sorted_view();
|
806
|
+
using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
|
807
|
+
AllocSortedView(allocator_).deallocate(sorted_view_, 1);
|
808
|
+
sorted_view_ = nullptr;
|
809
|
+
}
|
810
|
+
}
|
811
|
+
|
797
812
|
// iterator
|
798
813
|
|
799
|
-
template<typename T, typename C, typename
|
800
|
-
req_sketch<T, C,
|
814
|
+
template<typename T, typename C, typename A>
|
815
|
+
req_sketch<T, C, A>::const_iterator::const_iterator(LevelsIterator begin, LevelsIterator end):
|
801
816
|
levels_it_(begin),
|
802
817
|
levels_end_(end),
|
803
818
|
compactor_it_(begin == end ? nullptr : (*levels_it_).begin())
|
804
819
|
{}
|
805
820
|
|
806
|
-
template<typename T, typename C, typename
|
807
|
-
auto req_sketch<T, C,
|
821
|
+
template<typename T, typename C, typename A>
|
822
|
+
auto req_sketch<T, C, A>::const_iterator::operator++() -> const_iterator& {
|
808
823
|
++compactor_it_;
|
809
824
|
if (compactor_it_ == (*levels_it_).end()) {
|
810
825
|
++levels_it_;
|
@@ -813,28 +828,33 @@ auto req_sketch<T, C, S, A>::const_iterator::operator++() -> const_iterator& {
|
|
813
828
|
return *this;
|
814
829
|
}
|
815
830
|
|
816
|
-
template<typename T, typename C, typename
|
817
|
-
auto req_sketch<T, C,
|
831
|
+
template<typename T, typename C, typename A>
|
832
|
+
auto req_sketch<T, C, A>::const_iterator::operator++(int) -> const_iterator& {
|
818
833
|
const_iterator tmp(*this);
|
819
834
|
operator++();
|
820
835
|
return tmp;
|
821
836
|
}
|
822
837
|
|
823
|
-
template<typename T, typename C, typename
|
824
|
-
bool req_sketch<T, C,
|
838
|
+
template<typename T, typename C, typename A>
|
839
|
+
bool req_sketch<T, C, A>::const_iterator::operator==(const const_iterator& other) const {
|
825
840
|
if (levels_it_ != other.levels_it_) return false;
|
826
841
|
if (levels_it_ == levels_end_) return true;
|
827
842
|
return compactor_it_ == other.compactor_it_;
|
828
843
|
}
|
829
844
|
|
830
|
-
template<typename T, typename C, typename
|
831
|
-
bool req_sketch<T, C,
|
845
|
+
template<typename T, typename C, typename A>
|
846
|
+
bool req_sketch<T, C, A>::const_iterator::operator!=(const const_iterator& other) const {
|
832
847
|
return !operator==(other);
|
833
848
|
}
|
834
849
|
|
835
|
-
template<typename T, typename C, typename
|
836
|
-
|
837
|
-
return
|
850
|
+
template<typename T, typename C, typename A>
|
851
|
+
auto req_sketch<T, C, A>::const_iterator::operator*() const -> const value_type {
|
852
|
+
return value_type(*compactor_it_, 1ULL << (*levels_it_).get_lg_weight());
|
853
|
+
}
|
854
|
+
|
855
|
+
template<typename T, typename C, typename A>
|
856
|
+
auto req_sketch<T, C, A>::const_iterator::operator->() const -> const return_value_holder<value_type> {
|
857
|
+
return **this;
|
838
858
|
}
|
839
859
|
|
840
860
|
} /* namespace datasketches */
|