datasketches 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/kll_wrapper.cpp +20 -20
- data/ext/datasketches/theta_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
- data/vendor/datasketches-cpp/MANIFEST.in +21 -2
- data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
- data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
- data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
- data/vendor/datasketches-cpp/pyproject.toml +17 -12
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
- data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/setup.py +14 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
- data/vendor/datasketches-cpp/tox.ini +26 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
- data/vendor/datasketches-cpp/version.cfg.in +1 -0
- metadata +14 -5
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
|
@@ -32,20 +32,22 @@
|
|
|
32
32
|
|
|
33
33
|
namespace datasketches {
|
|
34
34
|
|
|
35
|
-
template<typename T, typename C, typename
|
|
36
|
-
kll_sketch<T, C,
|
|
35
|
+
template<typename T, typename C, typename A>
|
|
36
|
+
kll_sketch<T, C, A>::kll_sketch(uint16_t k, const C& comparator, const A& allocator):
|
|
37
|
+
comparator_(comparator),
|
|
37
38
|
allocator_(allocator),
|
|
38
39
|
k_(k),
|
|
39
40
|
m_(DEFAULT_M),
|
|
40
41
|
min_k_(k),
|
|
41
|
-
n_(0),
|
|
42
42
|
num_levels_(1),
|
|
43
|
+
is_level_zero_sorted_(false),
|
|
44
|
+
n_(0),
|
|
43
45
|
levels_(2, 0, allocator),
|
|
44
46
|
items_(nullptr),
|
|
45
47
|
items_size_(k_),
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
48
|
+
min_item_(nullptr),
|
|
49
|
+
max_item_(nullptr),
|
|
50
|
+
sorted_view_(nullptr)
|
|
49
51
|
{
|
|
50
52
|
if (k < MIN_K || k > MAX_K) {
|
|
51
53
|
throw std::invalid_argument("K must be >= " + std::to_string(MIN_K) + " and <= " + std::to_string(MAX_K) + ": " + std::to_string(k));
|
|
@@ -54,115 +56,126 @@ is_level_zero_sorted_(false)
|
|
|
54
56
|
items_ = allocator_.allocate(items_size_);
|
|
55
57
|
}
|
|
56
58
|
|
|
57
|
-
template<typename T, typename C, typename
|
|
58
|
-
kll_sketch<T, C,
|
|
59
|
+
template<typename T, typename C, typename A>
|
|
60
|
+
kll_sketch<T, C, A>::kll_sketch(const kll_sketch& other):
|
|
61
|
+
comparator_(other.comparator_),
|
|
59
62
|
allocator_(other.allocator_),
|
|
60
63
|
k_(other.k_),
|
|
61
64
|
m_(other.m_),
|
|
62
65
|
min_k_(other.min_k_),
|
|
63
|
-
n_(other.n_),
|
|
64
66
|
num_levels_(other.num_levels_),
|
|
67
|
+
is_level_zero_sorted_(other.is_level_zero_sorted_),
|
|
68
|
+
n_(other.n_),
|
|
65
69
|
levels_(other.levels_),
|
|
66
70
|
items_(nullptr),
|
|
67
71
|
items_size_(other.items_size_),
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
72
|
+
min_item_(nullptr),
|
|
73
|
+
max_item_(nullptr),
|
|
74
|
+
sorted_view_(nullptr)
|
|
71
75
|
{
|
|
72
76
|
items_ = allocator_.allocate(items_size_);
|
|
73
77
|
for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
|
|
74
|
-
if (other.
|
|
75
|
-
if (other.
|
|
78
|
+
if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
|
|
79
|
+
if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
|
|
76
80
|
}
|
|
77
81
|
|
|
78
|
-
template<typename T, typename C, typename
|
|
79
|
-
kll_sketch<T, C,
|
|
82
|
+
template<typename T, typename C, typename A>
|
|
83
|
+
kll_sketch<T, C, A>::kll_sketch(kll_sketch&& other) noexcept:
|
|
84
|
+
comparator_(std::move(other.comparator_)),
|
|
80
85
|
allocator_(std::move(other.allocator_)),
|
|
81
86
|
k_(other.k_),
|
|
82
87
|
m_(other.m_),
|
|
83
88
|
min_k_(other.min_k_),
|
|
84
|
-
n_(other.n_),
|
|
85
89
|
num_levels_(other.num_levels_),
|
|
90
|
+
is_level_zero_sorted_(other.is_level_zero_sorted_),
|
|
91
|
+
n_(other.n_),
|
|
86
92
|
levels_(std::move(other.levels_)),
|
|
87
93
|
items_(other.items_),
|
|
88
94
|
items_size_(other.items_size_),
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
95
|
+
min_item_(other.min_item_),
|
|
96
|
+
max_item_(other.max_item_),
|
|
97
|
+
sorted_view_(nullptr)
|
|
92
98
|
{
|
|
93
99
|
other.items_ = nullptr;
|
|
94
|
-
other.
|
|
95
|
-
other.
|
|
100
|
+
other.min_item_ = nullptr;
|
|
101
|
+
other.max_item_ = nullptr;
|
|
96
102
|
}
|
|
97
103
|
|
|
98
|
-
template<typename T, typename C, typename
|
|
99
|
-
kll_sketch<T, C,
|
|
100
|
-
kll_sketch
|
|
104
|
+
template<typename T, typename C, typename A>
|
|
105
|
+
kll_sketch<T, C, A>& kll_sketch<T, C, A>::operator=(const kll_sketch& other) {
|
|
106
|
+
kll_sketch copy(other);
|
|
107
|
+
std::swap(comparator_, copy.comparator_);
|
|
101
108
|
std::swap(allocator_, copy.allocator_);
|
|
102
109
|
std::swap(k_, copy.k_);
|
|
103
110
|
std::swap(m_, copy.m_);
|
|
104
111
|
std::swap(min_k_, copy.min_k_);
|
|
105
|
-
std::swap(n_, copy.n_);
|
|
106
112
|
std::swap(num_levels_, copy.num_levels_);
|
|
113
|
+
std::swap(is_level_zero_sorted_, copy.is_level_zero_sorted_);
|
|
114
|
+
std::swap(n_, copy.n_);
|
|
107
115
|
std::swap(levels_, copy.levels_);
|
|
108
116
|
std::swap(items_, copy.items_);
|
|
109
117
|
std::swap(items_size_, copy.items_size_);
|
|
110
|
-
std::swap(
|
|
111
|
-
std::swap(
|
|
112
|
-
|
|
118
|
+
std::swap(min_item_, copy.min_item_);
|
|
119
|
+
std::swap(max_item_, copy.max_item_);
|
|
120
|
+
reset_sorted_view();
|
|
113
121
|
return *this;
|
|
114
122
|
}
|
|
115
123
|
|
|
116
|
-
template<typename T, typename C, typename
|
|
117
|
-
kll_sketch<T, C,
|
|
124
|
+
template<typename T, typename C, typename A>
|
|
125
|
+
kll_sketch<T, C, A>& kll_sketch<T, C, A>::operator=(kll_sketch&& other) {
|
|
126
|
+
std::swap(comparator_, other.comparator_);
|
|
118
127
|
std::swap(allocator_, other.allocator_);
|
|
119
128
|
std::swap(k_, other.k_);
|
|
120
129
|
std::swap(m_, other.m_);
|
|
121
130
|
std::swap(min_k_, other.min_k_);
|
|
122
|
-
std::swap(n_, other.n_);
|
|
123
131
|
std::swap(num_levels_, other.num_levels_);
|
|
132
|
+
std::swap(is_level_zero_sorted_, other.is_level_zero_sorted_);
|
|
133
|
+
std::swap(n_, other.n_);
|
|
124
134
|
std::swap(levels_, other.levels_);
|
|
125
135
|
std::swap(items_, other.items_);
|
|
126
136
|
std::swap(items_size_, other.items_size_);
|
|
127
|
-
std::swap(
|
|
128
|
-
std::swap(
|
|
129
|
-
|
|
137
|
+
std::swap(min_item_, other.min_item_);
|
|
138
|
+
std::swap(max_item_, other.max_item_);
|
|
139
|
+
reset_sorted_view();
|
|
130
140
|
return *this;
|
|
131
141
|
}
|
|
132
142
|
|
|
133
|
-
template<typename T, typename C, typename
|
|
134
|
-
kll_sketch<T, C,
|
|
143
|
+
template<typename T, typename C, typename A>
|
|
144
|
+
kll_sketch<T, C, A>::~kll_sketch() {
|
|
135
145
|
if (items_ != nullptr) {
|
|
136
146
|
const uint32_t begin = levels_[0];
|
|
137
147
|
const uint32_t end = levels_[num_levels_];
|
|
138
148
|
for (uint32_t i = begin; i < end; i++) items_[i].~T();
|
|
139
149
|
allocator_.deallocate(items_, items_size_);
|
|
140
150
|
}
|
|
141
|
-
if (
|
|
142
|
-
|
|
143
|
-
allocator_.deallocate(
|
|
151
|
+
if (min_item_ != nullptr) {
|
|
152
|
+
min_item_->~T();
|
|
153
|
+
allocator_.deallocate(min_item_, 1);
|
|
144
154
|
}
|
|
145
|
-
if (
|
|
146
|
-
|
|
147
|
-
allocator_.deallocate(
|
|
155
|
+
if (max_item_ != nullptr) {
|
|
156
|
+
max_item_->~T();
|
|
157
|
+
allocator_.deallocate(max_item_, 1);
|
|
148
158
|
}
|
|
159
|
+
reset_sorted_view();
|
|
149
160
|
}
|
|
150
161
|
|
|
151
|
-
template<typename T, typename C, typename
|
|
152
|
-
template<typename TT, typename CC, typename
|
|
153
|
-
kll_sketch<T, C,
|
|
162
|
+
template<typename T, typename C, typename A>
|
|
163
|
+
template<typename TT, typename CC, typename AA>
|
|
164
|
+
kll_sketch<T, C, A>::kll_sketch(const kll_sketch<TT, CC, AA>& other, const C& comparator, const A& allocator):
|
|
165
|
+
comparator_(comparator),
|
|
154
166
|
allocator_(allocator),
|
|
155
167
|
k_(other.k_),
|
|
156
168
|
m_(other.m_),
|
|
157
169
|
min_k_(other.min_k_),
|
|
158
|
-
n_(other.n_),
|
|
159
170
|
num_levels_(other.num_levels_),
|
|
171
|
+
is_level_zero_sorted_(other.is_level_zero_sorted_),
|
|
172
|
+
n_(other.n_),
|
|
160
173
|
levels_(other.levels_, allocator_),
|
|
161
174
|
items_(nullptr),
|
|
162
175
|
items_size_(other.items_size_),
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
176
|
+
min_item_(nullptr),
|
|
177
|
+
max_item_(nullptr),
|
|
178
|
+
sorted_view_(nullptr)
|
|
166
179
|
{
|
|
167
180
|
static_assert(
|
|
168
181
|
std::is_constructible<T, TT>::value,
|
|
@@ -170,52 +183,53 @@ is_level_zero_sorted_(other.is_level_zero_sorted_)
|
|
|
170
183
|
);
|
|
171
184
|
items_ = allocator_.allocate(items_size_);
|
|
172
185
|
for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
|
|
173
|
-
if (other.
|
|
174
|
-
if (other.
|
|
186
|
+
if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
|
|
187
|
+
if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
|
|
175
188
|
check_sorting();
|
|
176
189
|
}
|
|
177
190
|
|
|
178
|
-
template<typename T, typename C, typename
|
|
191
|
+
template<typename T, typename C, typename A>
|
|
179
192
|
template<typename FwdT>
|
|
180
|
-
void kll_sketch<T, C,
|
|
181
|
-
if (!
|
|
182
|
-
update_min_max(
|
|
193
|
+
void kll_sketch<T, C, A>::update(FwdT&& item) {
|
|
194
|
+
if (!check_update_item(item)) { return; }
|
|
195
|
+
update_min_max(item);
|
|
183
196
|
const uint32_t index = internal_update();
|
|
184
|
-
new (&items_[index]) T(std::forward<FwdT>(
|
|
197
|
+
new (&items_[index]) T(std::forward<FwdT>(item));
|
|
198
|
+
reset_sorted_view();
|
|
185
199
|
}
|
|
186
200
|
|
|
187
|
-
template<typename T, typename C, typename
|
|
188
|
-
void kll_sketch<T, C,
|
|
201
|
+
template<typename T, typename C, typename A>
|
|
202
|
+
void kll_sketch<T, C, A>::update_min_max(const T& item) {
|
|
189
203
|
if (is_empty()) {
|
|
190
|
-
|
|
191
|
-
|
|
204
|
+
min_item_ = new (allocator_.allocate(1)) T(item);
|
|
205
|
+
max_item_ = new (allocator_.allocate(1)) T(item);
|
|
192
206
|
} else {
|
|
193
|
-
if (
|
|
194
|
-
if (
|
|
207
|
+
if (comparator_(item, *min_item_)) *min_item_ = item;
|
|
208
|
+
if (comparator_(*max_item_, item)) *max_item_ = item;
|
|
195
209
|
}
|
|
196
210
|
}
|
|
197
211
|
|
|
198
|
-
template<typename T, typename C, typename
|
|
199
|
-
uint32_t kll_sketch<T, C,
|
|
212
|
+
template<typename T, typename C, typename A>
|
|
213
|
+
uint32_t kll_sketch<T, C, A>::internal_update() {
|
|
200
214
|
if (levels_[0] == 0) compress_while_updating();
|
|
201
215
|
n_++;
|
|
202
216
|
is_level_zero_sorted_ = false;
|
|
203
217
|
return --levels_[0];
|
|
204
218
|
}
|
|
205
219
|
|
|
206
|
-
template<typename T, typename C, typename
|
|
220
|
+
template<typename T, typename C, typename A>
|
|
207
221
|
template<typename FwdSk>
|
|
208
|
-
void kll_sketch<T, C,
|
|
222
|
+
void kll_sketch<T, C, A>::merge(FwdSk&& other) {
|
|
209
223
|
if (other.is_empty()) return;
|
|
210
224
|
if (m_ != other.m_) {
|
|
211
225
|
throw std::invalid_argument("incompatible M: " + std::to_string(m_) + " and " + std::to_string(other.m_));
|
|
212
226
|
}
|
|
213
227
|
if (is_empty()) {
|
|
214
|
-
|
|
215
|
-
|
|
228
|
+
min_item_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.min_item_));
|
|
229
|
+
max_item_ = new (allocator_.allocate(1)) T(conditional_forward<FwdSk>(*other.max_item_));
|
|
216
230
|
} else {
|
|
217
|
-
if (
|
|
218
|
-
if (
|
|
231
|
+
if (comparator_(*other.min_item_, *min_item_)) *min_item_ = conditional_forward<FwdSk>(*other.min_item_);
|
|
232
|
+
if (comparator_(*max_item_, *other.max_item_)) *max_item_ = conditional_forward<FwdSk>(*other.max_item_);
|
|
219
233
|
}
|
|
220
234
|
const uint64_t final_n = n_ + other.n_;
|
|
221
235
|
for (uint32_t i = other.levels_[0]; i < other.levels_[1]; i++) {
|
|
@@ -226,149 +240,133 @@ void kll_sketch<T, C, S, A>::merge(FwdSk&& other) {
|
|
|
226
240
|
n_ = final_n;
|
|
227
241
|
if (other.is_estimation_mode()) min_k_ = std::min(min_k_, other.min_k_);
|
|
228
242
|
assert_correct_total_weight();
|
|
243
|
+
reset_sorted_view();
|
|
229
244
|
}
|
|
230
245
|
|
|
231
|
-
template<typename T, typename C, typename
|
|
232
|
-
bool kll_sketch<T, C,
|
|
246
|
+
template<typename T, typename C, typename A>
|
|
247
|
+
bool kll_sketch<T, C, A>::is_empty() const {
|
|
233
248
|
return n_ == 0;
|
|
234
249
|
}
|
|
235
250
|
|
|
236
|
-
template<typename T, typename C, typename
|
|
237
|
-
uint16_t kll_sketch<T, C,
|
|
251
|
+
template<typename T, typename C, typename A>
|
|
252
|
+
uint16_t kll_sketch<T, C, A>::get_k() const {
|
|
238
253
|
return k_;
|
|
239
254
|
}
|
|
240
255
|
|
|
241
|
-
template<typename T, typename C, typename
|
|
242
|
-
uint64_t kll_sketch<T, C,
|
|
256
|
+
template<typename T, typename C, typename A>
|
|
257
|
+
uint64_t kll_sketch<T, C, A>::get_n() const {
|
|
243
258
|
return n_;
|
|
244
259
|
}
|
|
245
260
|
|
|
246
|
-
template<typename T, typename C, typename
|
|
247
|
-
uint32_t kll_sketch<T, C,
|
|
261
|
+
template<typename T, typename C, typename A>
|
|
262
|
+
uint32_t kll_sketch<T, C, A>::get_num_retained() const {
|
|
248
263
|
return levels_[num_levels_] - levels_[0];
|
|
249
264
|
}
|
|
250
265
|
|
|
251
|
-
template<typename T, typename C, typename
|
|
252
|
-
bool kll_sketch<T, C,
|
|
266
|
+
template<typename T, typename C, typename A>
|
|
267
|
+
bool kll_sketch<T, C, A>::is_estimation_mode() const {
|
|
253
268
|
return num_levels_ > 1;
|
|
254
269
|
}
|
|
255
270
|
|
|
256
|
-
template<typename T, typename C, typename
|
|
257
|
-
T kll_sketch<T, C,
|
|
258
|
-
if (is_empty())
|
|
259
|
-
return *
|
|
271
|
+
template<typename T, typename C, typename A>
|
|
272
|
+
T kll_sketch<T, C, A>::get_min_item() const {
|
|
273
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
274
|
+
return *min_item_;
|
|
260
275
|
}
|
|
261
276
|
|
|
262
|
-
template<typename T, typename C, typename
|
|
263
|
-
T kll_sketch<T, C,
|
|
264
|
-
if (is_empty())
|
|
265
|
-
return *
|
|
277
|
+
template<typename T, typename C, typename A>
|
|
278
|
+
T kll_sketch<T, C, A>::get_max_item() const {
|
|
279
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
280
|
+
return *max_item_;
|
|
266
281
|
}
|
|
267
282
|
|
|
268
|
-
template<typename T, typename C, typename
|
|
269
|
-
C kll_sketch<T, C,
|
|
270
|
-
return
|
|
283
|
+
template<typename T, typename C, typename A>
|
|
284
|
+
C kll_sketch<T, C, A>::get_comparator() const {
|
|
285
|
+
return comparator_;
|
|
271
286
|
}
|
|
272
287
|
|
|
273
|
-
template<typename T, typename C, typename
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
288
|
+
template<typename T, typename C, typename A>
|
|
289
|
+
A kll_sketch<T, C, A>::get_allocator() const {
|
|
290
|
+
return allocator_;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
template<typename T, typename C, typename A>
|
|
294
|
+
double kll_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
|
|
295
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
296
|
+
setup_sorted_view();
|
|
297
|
+
return sorted_view_->get_rank(item, inclusive);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
template<typename T, typename C, typename A>
|
|
301
|
+
auto kll_sketch<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
|
|
302
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
303
|
+
setup_sorted_view();
|
|
304
|
+
return sorted_view_->get_PMF(split_points, size, inclusive);
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
template<typename T, typename C, typename A>
|
|
308
|
+
auto kll_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
|
|
309
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
310
|
+
setup_sorted_view();
|
|
311
|
+
return sorted_view_->get_CDF(split_points, size, inclusive);
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
template<typename T, typename C, typename A>
|
|
315
|
+
auto kll_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
|
|
316
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
279
317
|
if ((rank < 0.0) || (rank > 1.0)) {
|
|
280
|
-
throw std::invalid_argument("
|
|
318
|
+
throw std::invalid_argument("normalized rank cannot be less than zero or greater than 1.0");
|
|
281
319
|
}
|
|
282
320
|
// may have a side effect of sorting level zero if needed
|
|
283
|
-
|
|
321
|
+
setup_sorted_view();
|
|
322
|
+
return sorted_view_->get_quantile(rank, inclusive);
|
|
284
323
|
}
|
|
285
324
|
|
|
286
|
-
template<typename T, typename C, typename
|
|
287
|
-
|
|
288
|
-
|
|
325
|
+
template<typename T, typename C, typename A>
|
|
326
|
+
std::vector<T, A> kll_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
|
|
327
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
289
328
|
std::vector<T, A> quantiles(allocator_);
|
|
290
|
-
if (is_empty()) return quantiles;
|
|
291
329
|
quantiles.reserve(size);
|
|
292
330
|
|
|
293
331
|
// may have a side effect of sorting level zero if needed
|
|
294
|
-
|
|
332
|
+
setup_sorted_view();
|
|
295
333
|
|
|
296
334
|
for (uint32_t i = 0; i < size; i++) {
|
|
297
335
|
const double rank = ranks[i];
|
|
298
336
|
if ((rank < 0.0) || (rank > 1.0)) {
|
|
299
|
-
throw std::invalid_argument("
|
|
300
|
-
}
|
|
301
|
-
else if (rank == 0.0) quantiles.push_back(*min_value_);
|
|
302
|
-
else if (rank == 1.0) quantiles.push_back(*max_value_);
|
|
303
|
-
else {
|
|
304
|
-
quantiles.push_back(view.get_quantile(rank));
|
|
337
|
+
throw std::invalid_argument("normalized rank cannot be less than 0 or greater than 1");
|
|
305
338
|
}
|
|
339
|
+
quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
|
|
306
340
|
}
|
|
307
341
|
return quantiles;
|
|
308
342
|
}
|
|
309
343
|
|
|
310
|
-
template<typename T, typename C, typename
|
|
311
|
-
|
|
312
|
-
std::
|
|
313
|
-
if (is_empty()) return std::vector<T, A>(allocator_);
|
|
344
|
+
template<typename T, typename C, typename A>
|
|
345
|
+
std::vector<T, A> kll_sketch<T, C, A>::get_quantiles(uint32_t num, bool inclusive) const {
|
|
346
|
+
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
314
347
|
if (num == 0) {
|
|
315
348
|
throw std::invalid_argument("num must be > 0");
|
|
316
349
|
}
|
|
317
|
-
|
|
318
|
-
|
|
350
|
+
vector_double ranks(num, 0, allocator_);
|
|
351
|
+
ranks[0] = 0.0;
|
|
319
352
|
for (size_t i = 1; i < num; i++) {
|
|
320
|
-
|
|
353
|
+
ranks[i] = static_cast<double>(i) / (num - 1);
|
|
321
354
|
}
|
|
322
355
|
if (num > 1) {
|
|
323
|
-
|
|
324
|
-
}
|
|
325
|
-
return get_quantiles<inclusive>(fractions.data(), num);
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
template<typename T, typename C, typename S, typename A>
|
|
329
|
-
template<bool inclusive>
|
|
330
|
-
double kll_sketch<T, C, S, A>::get_rank(const T& value) const {
|
|
331
|
-
if (is_empty()) return std::numeric_limits<double>::quiet_NaN();
|
|
332
|
-
uint8_t level = 0;
|
|
333
|
-
uint64_t weight = 1;
|
|
334
|
-
uint64_t total = 0;
|
|
335
|
-
while (level < num_levels_) {
|
|
336
|
-
const auto from_index = levels_[level];
|
|
337
|
-
const auto to_index = levels_[level + 1]; // exclusive
|
|
338
|
-
for (uint32_t i = from_index; i < to_index; i++) {
|
|
339
|
-
if (inclusive ? !C()(value, items_[i]) : C()(items_[i], value)) {
|
|
340
|
-
total += weight;
|
|
341
|
-
} else if ((level > 0) || is_level_zero_sorted_) {
|
|
342
|
-
break; // levels above 0 are sorted, no point comparing further
|
|
343
|
-
}
|
|
344
|
-
}
|
|
345
|
-
level++;
|
|
346
|
-
weight *= 2;
|
|
356
|
+
ranks[num - 1] = 1.0;
|
|
347
357
|
}
|
|
348
|
-
return (
|
|
349
|
-
}
|
|
350
|
-
|
|
351
|
-
template<typename T, typename C, typename S, typename A>
|
|
352
|
-
template<bool inclusive>
|
|
353
|
-
vector_d<A> kll_sketch<T, C, S, A>::get_PMF(const T* split_points, uint32_t size) const {
|
|
354
|
-
return get_PMF_or_CDF<inclusive>(split_points, size, false);
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
template<typename T, typename C, typename S, typename A>
|
|
358
|
-
template<bool inclusive>
|
|
359
|
-
vector_d<A> kll_sketch<T, C, S, A>::get_CDF(const T* split_points, uint32_t size) const {
|
|
360
|
-
return get_PMF_or_CDF<inclusive>(split_points, size, true);
|
|
358
|
+
return get_quantiles(ranks.data(), num, inclusive);
|
|
361
359
|
}
|
|
362
360
|
|
|
363
|
-
template<typename T, typename C, typename
|
|
364
|
-
double kll_sketch<T, C,
|
|
361
|
+
template<typename T, typename C, typename A>
|
|
362
|
+
double kll_sketch<T, C, A>::get_normalized_rank_error(bool pmf) const {
|
|
365
363
|
return get_normalized_rank_error(min_k_, pmf);
|
|
366
364
|
}
|
|
367
365
|
|
|
368
366
|
// implementation for fixed-size arithmetic types (integral and floating point)
|
|
369
|
-
template<typename T, typename C, typename
|
|
367
|
+
template<typename T, typename C, typename A>
|
|
370
368
|
template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
|
|
371
|
-
size_t kll_sketch<T, C,
|
|
369
|
+
size_t kll_sketch<T, C, A>::get_serialized_size_bytes(const SerDe&) const {
|
|
372
370
|
if (is_empty()) { return EMPTY_SIZE_BYTES; }
|
|
373
371
|
if (num_levels_ == 1 && get_num_retained() == 1) {
|
|
374
372
|
return DATA_START_SINGLE_ITEM + sizeof(TT);
|
|
@@ -378,25 +376,25 @@ size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe&) const {
|
|
|
378
376
|
}
|
|
379
377
|
|
|
380
378
|
// implementation for all other types
|
|
381
|
-
template<typename T, typename C, typename
|
|
379
|
+
template<typename T, typename C, typename A>
|
|
382
380
|
template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
|
|
383
|
-
size_t kll_sketch<T, C,
|
|
381
|
+
size_t kll_sketch<T, C, A>::get_serialized_size_bytes(const SerDe& sd) const {
|
|
384
382
|
if (is_empty()) { return EMPTY_SIZE_BYTES; }
|
|
385
383
|
if (num_levels_ == 1 && get_num_retained() == 1) {
|
|
386
384
|
return DATA_START_SINGLE_ITEM + sd.size_of_item(items_[levels_[0]]);
|
|
387
385
|
}
|
|
388
386
|
// the last integer in the levels_ array is not serialized because it can be derived
|
|
389
387
|
size_t size = DATA_START + num_levels_ * sizeof(uint32_t);
|
|
390
|
-
size += sd.size_of_item(*
|
|
391
|
-
size += sd.size_of_item(*
|
|
388
|
+
size += sd.size_of_item(*min_item_);
|
|
389
|
+
size += sd.size_of_item(*max_item_);
|
|
392
390
|
for (auto it: *this) size += sd.size_of_item(it.first);
|
|
393
391
|
return size;
|
|
394
392
|
}
|
|
395
393
|
|
|
396
394
|
// implementation for fixed-size arithmetic types (integral and floating point)
|
|
397
|
-
template<typename T, typename C, typename
|
|
395
|
+
template<typename T, typename C, typename A>
|
|
398
396
|
template<typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
|
|
399
|
-
size_t kll_sketch<T, C,
|
|
397
|
+
size_t kll_sketch<T, C, A>::get_max_serialized_size_bytes(uint16_t k, uint64_t n) {
|
|
400
398
|
const uint8_t num_levels = kll_helper::ub_on_num_levels(n);
|
|
401
399
|
const uint32_t max_num_retained = kll_helper::compute_total_capacity(k, DEFAULT_M, num_levels);
|
|
402
400
|
// the last integer in the levels_ array is not serialized because it can be derived
|
|
@@ -404,18 +402,18 @@ size_t kll_sketch<T, C, S, A>::get_max_serialized_size_bytes(uint16_t k, uint64_
|
|
|
404
402
|
}
|
|
405
403
|
|
|
406
404
|
// implementation for all other types
|
|
407
|
-
template<typename T, typename C, typename
|
|
405
|
+
template<typename T, typename C, typename A>
|
|
408
406
|
template<typename TT, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
|
|
409
|
-
size_t kll_sketch<T, C,
|
|
407
|
+
size_t kll_sketch<T, C, A>::get_max_serialized_size_bytes(uint16_t k, uint64_t n, size_t max_item_size_bytes) {
|
|
410
408
|
const uint8_t num_levels = kll_helper::ub_on_num_levels(n);
|
|
411
409
|
const uint32_t max_num_retained = kll_helper::compute_total_capacity(k, DEFAULT_M, num_levels);
|
|
412
410
|
// the last integer in the levels_ array is not serialized because it can be derived
|
|
413
411
|
return DATA_START + num_levels * sizeof(uint32_t) + (max_num_retained + 2) * max_item_size_bytes;
|
|
414
412
|
}
|
|
415
413
|
|
|
416
|
-
template<typename T, typename C, typename
|
|
414
|
+
template<typename T, typename C, typename A>
|
|
417
415
|
template<typename SerDe>
|
|
418
|
-
void kll_sketch<T, C,
|
|
416
|
+
void kll_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& sd) const {
|
|
419
417
|
const bool is_single_item = n_ == 1;
|
|
420
418
|
const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
|
|
421
419
|
write(os, preamble_ints);
|
|
@@ -440,18 +438,18 @@ void kll_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const
|
|
|
440
438
|
write(os, num_levels_);
|
|
441
439
|
write(os, unused);
|
|
442
440
|
write(os, levels_.data(), sizeof(levels_[0]) * num_levels_);
|
|
443
|
-
sd.serialize(os,
|
|
444
|
-
sd.serialize(os,
|
|
441
|
+
sd.serialize(os, min_item_, 1);
|
|
442
|
+
sd.serialize(os, max_item_, 1);
|
|
445
443
|
}
|
|
446
444
|
sd.serialize(os, &items_[levels_[0]], get_num_retained());
|
|
447
445
|
}
|
|
448
446
|
|
|
449
|
-
template<typename T, typename C, typename
|
|
447
|
+
template<typename T, typename C, typename A>
|
|
450
448
|
template<typename SerDe>
|
|
451
|
-
|
|
449
|
+
auto kll_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
|
|
452
450
|
const bool is_single_item = n_ == 1;
|
|
453
451
|
const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
|
|
454
|
-
|
|
452
|
+
vector_bytes bytes(size, 0, allocator_);
|
|
455
453
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
|
456
454
|
const uint8_t* end_ptr = ptr + size;
|
|
457
455
|
const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
|
|
@@ -476,25 +474,22 @@ vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const
|
|
|
476
474
|
ptr += copy_to_mem(num_levels_, ptr);
|
|
477
475
|
ptr += sizeof(uint8_t); // unused
|
|
478
476
|
ptr += copy_to_mem(levels_.data(), ptr, sizeof(levels_[0]) * num_levels_);
|
|
479
|
-
ptr += sd.serialize(ptr, end_ptr - ptr,
|
|
480
|
-
ptr += sd.serialize(ptr, end_ptr - ptr,
|
|
477
|
+
ptr += sd.serialize(ptr, end_ptr - ptr, min_item_, 1);
|
|
478
|
+
ptr += sd.serialize(ptr, end_ptr - ptr, max_item_, 1);
|
|
481
479
|
}
|
|
482
480
|
const size_t bytes_remaining = end_ptr - ptr;
|
|
483
481
|
ptr += sd.serialize(ptr, bytes_remaining, &items_[levels_[0]], get_num_retained());
|
|
484
482
|
}
|
|
485
483
|
const size_t delta = ptr - bytes.data();
|
|
486
|
-
if (delta != size) throw std::logic_error("serialized size mismatch: " + std::to_string(delta)
|
|
484
|
+
if (delta != size) throw std::logic_error("serialized size mismatch: " + std::to_string(delta)
|
|
485
|
+
+ " != " + std::to_string(size));
|
|
487
486
|
return bytes;
|
|
488
487
|
}
|
|
489
488
|
|
|
490
|
-
template<typename T, typename C, typename
|
|
491
|
-
kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, const A& allocator) {
|
|
492
|
-
return deserialize(is, S(), allocator);
|
|
493
|
-
}
|
|
494
|
-
|
|
495
|
-
template<typename T, typename C, typename S, typename A>
|
|
489
|
+
template<typename T, typename C, typename A>
|
|
496
490
|
template<typename SerDe>
|
|
497
|
-
kll_sketch<T, C,
|
|
491
|
+
kll_sketch<T, C, A> kll_sketch<T, C, A>::deserialize(std::istream& is, const SerDe& sd,
|
|
492
|
+
const C& comparator, const A& allocator) {
|
|
498
493
|
const auto preamble_ints = read<uint8_t>(is);
|
|
499
494
|
const auto serial_version = read<uint8_t>(is);
|
|
500
495
|
const auto family_id = read<uint8_t>(is);
|
|
@@ -510,7 +505,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, con
|
|
|
510
505
|
|
|
511
506
|
if (!is.good()) throw std::runtime_error("error reading from std::istream");
|
|
512
507
|
const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
|
|
513
|
-
if (is_empty) return kll_sketch(k, allocator);
|
|
508
|
+
if (is_empty) return kll_sketch(k, comparator, allocator);
|
|
514
509
|
|
|
515
510
|
uint64_t n;
|
|
516
511
|
uint16_t min_k;
|
|
@@ -526,7 +521,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, con
|
|
|
526
521
|
num_levels = read<uint8_t>(is);
|
|
527
522
|
read<uint8_t>(is); // skip unused byte
|
|
528
523
|
}
|
|
529
|
-
vector_u32
|
|
524
|
+
vector_u32 levels(num_levels + 1, 0, allocator);
|
|
530
525
|
const uint32_t capacity(kll_helper::compute_total_capacity(k, m, num_levels));
|
|
531
526
|
if (is_single_item) {
|
|
532
527
|
levels[0] = capacity - 1;
|
|
@@ -537,17 +532,17 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, con
|
|
|
537
532
|
levels[num_levels] = capacity;
|
|
538
533
|
A alloc(allocator);
|
|
539
534
|
auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
|
|
540
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
|
541
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
|
542
|
-
std::unique_ptr<T, item_deleter>
|
|
543
|
-
std::unique_ptr<T, item_deleter>
|
|
535
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
|
536
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
|
537
|
+
std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
|
|
538
|
+
std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
|
|
544
539
|
if (!is_single_item) {
|
|
545
|
-
sd.deserialize(is,
|
|
540
|
+
sd.deserialize(is, min_item_buffer.get(), 1);
|
|
546
541
|
// serde call did not throw, repackage with destrtuctor
|
|
547
|
-
|
|
548
|
-
sd.deserialize(is,
|
|
542
|
+
min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
|
|
543
|
+
sd.deserialize(is, max_item_buffer.get(), 1);
|
|
549
544
|
// serde call did not throw, repackage with destrtuctor
|
|
550
|
-
|
|
545
|
+
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
|
551
546
|
}
|
|
552
547
|
auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
|
|
553
548
|
std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(alloc.allocate(capacity), items_buffer_deleter);
|
|
@@ -557,27 +552,23 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, con
|
|
|
557
552
|
std::unique_ptr<T, items_deleter> items(items_buffer.release(), items_deleter(levels[0], capacity, allocator));
|
|
558
553
|
const bool is_level_zero_sorted = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
|
|
559
554
|
if (is_single_item) {
|
|
560
|
-
new (
|
|
555
|
+
new (min_item_buffer.get()) T(items.get()[levels[0]]);
|
|
561
556
|
// copy did not throw, repackage with destrtuctor
|
|
562
|
-
|
|
563
|
-
new (
|
|
557
|
+
min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
|
|
558
|
+
new (max_item_buffer.get()) T(items.get()[levels[0]]);
|
|
564
559
|
// copy did not throw, repackage with destrtuctor
|
|
565
|
-
|
|
560
|
+
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
|
566
561
|
}
|
|
567
562
|
if (!is.good())
|
|
568
563
|
throw std::runtime_error("error reading from std::istream");
|
|
569
564
|
return kll_sketch(k, min_k, n, num_levels, std::move(levels), std::move(items), capacity,
|
|
570
|
-
std::move(
|
|
571
|
-
}
|
|
572
|
-
|
|
573
|
-
template<typename T, typename C, typename S, typename A>
|
|
574
|
-
kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
|
|
575
|
-
return deserialize(bytes, size, S(), allocator);
|
|
565
|
+
std::move(min_item), std::move(max_item), is_level_zero_sorted, comparator);
|
|
576
566
|
}
|
|
577
567
|
|
|
578
|
-
template<typename T, typename C, typename
|
|
568
|
+
template<typename T, typename C, typename A>
|
|
579
569
|
template<typename SerDe>
|
|
580
|
-
kll_sketch<T, C,
|
|
570
|
+
kll_sketch<T, C, A> kll_sketch<T, C, A>::deserialize(const void* bytes, size_t size, const SerDe& sd,
|
|
571
|
+
const C& comparator, const A& allocator) {
|
|
581
572
|
ensure_minimum_memory(size, 8);
|
|
582
573
|
const char* ptr = static_cast<const char*>(bytes);
|
|
583
574
|
uint8_t preamble_ints;
|
|
@@ -601,7 +592,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
|
|
|
601
592
|
ensure_minimum_memory(size, preamble_ints * sizeof(uint32_t));
|
|
602
593
|
|
|
603
594
|
const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
|
|
604
|
-
if (is_empty) return kll_sketch
|
|
595
|
+
if (is_empty) return kll_sketch(k, comparator, allocator);
|
|
605
596
|
|
|
606
597
|
uint64_t n;
|
|
607
598
|
uint16_t min_k;
|
|
@@ -618,7 +609,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
|
|
|
618
609
|
ptr += copy_from_mem(ptr, num_levels);
|
|
619
610
|
ptr += sizeof(uint8_t); // skip unused byte
|
|
620
611
|
}
|
|
621
|
-
vector_u32
|
|
612
|
+
vector_u32 levels(num_levels + 1, 0, allocator);
|
|
622
613
|
const uint32_t capacity(kll_helper::compute_total_capacity(k, m, num_levels));
|
|
623
614
|
if (is_single_item) {
|
|
624
615
|
levels[0] = capacity - 1;
|
|
@@ -629,17 +620,17 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
|
|
|
629
620
|
levels[num_levels] = capacity;
|
|
630
621
|
A alloc(allocator);
|
|
631
622
|
auto item_buffer_deleter = [&alloc](T* ptr) { alloc.deallocate(ptr, 1); };
|
|
632
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
|
633
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)>
|
|
634
|
-
std::unique_ptr<T, item_deleter>
|
|
635
|
-
std::unique_ptr<T, item_deleter>
|
|
623
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> min_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
|
624
|
+
std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
|
625
|
+
std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
|
|
626
|
+
std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
|
|
636
627
|
if (!is_single_item) {
|
|
637
|
-
ptr += sd.deserialize(ptr, end_ptr - ptr,
|
|
628
|
+
ptr += sd.deserialize(ptr, end_ptr - ptr, min_item_buffer.get(), 1);
|
|
638
629
|
// serde call did not throw, repackage with destrtuctor
|
|
639
|
-
|
|
640
|
-
ptr += sd.deserialize(ptr, end_ptr - ptr,
|
|
630
|
+
min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
|
|
631
|
+
ptr += sd.deserialize(ptr, end_ptr - ptr, max_item_buffer.get(), 1);
|
|
641
632
|
// serde call did not throw, repackage with destrtuctor
|
|
642
|
-
|
|
633
|
+
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
|
643
634
|
}
|
|
644
635
|
auto items_buffer_deleter = [capacity, &alloc](T* ptr) { alloc.deallocate(ptr, capacity); };
|
|
645
636
|
std::unique_ptr<T, decltype(items_buffer_deleter)> items_buffer(alloc.allocate(capacity), items_buffer_deleter);
|
|
@@ -651,15 +642,15 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
|
|
|
651
642
|
if (delta != size) throw std::logic_error("deserialized size mismatch: " + std::to_string(delta) + " != " + std::to_string(size));
|
|
652
643
|
const bool is_level_zero_sorted = (flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED)) > 0;
|
|
653
644
|
if (is_single_item) {
|
|
654
|
-
new (
|
|
645
|
+
new (min_item_buffer.get()) T(items.get()[levels[0]]);
|
|
655
646
|
// copy did not throw, repackage with destrtuctor
|
|
656
|
-
|
|
657
|
-
new (
|
|
647
|
+
min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
|
|
648
|
+
new (max_item_buffer.get()) T(items.get()[levels[0]]);
|
|
658
649
|
// copy did not throw, repackage with destrtuctor
|
|
659
|
-
|
|
650
|
+
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
|
660
651
|
}
|
|
661
652
|
return kll_sketch(k, min_k, n, num_levels, std::move(levels), std::move(items), capacity,
|
|
662
|
-
std::move(
|
|
653
|
+
std::move(min_item), std::move(max_item), is_level_zero_sorted, comparator);
|
|
663
654
|
}
|
|
664
655
|
|
|
665
656
|
/*
|
|
@@ -669,36 +660,38 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
|
|
|
669
660
|
* Otherwise, it is the "single-sided" normalized rank error for all the other queries.
|
|
670
661
|
* Constants were derived as the best fit to 99 percentile empirically measured max error in thousands of trials
|
|
671
662
|
*/
|
|
672
|
-
template<typename T, typename C, typename
|
|
673
|
-
double kll_sketch<T, C,
|
|
663
|
+
template<typename T, typename C, typename A>
|
|
664
|
+
double kll_sketch<T, C, A>::get_normalized_rank_error(uint16_t k, bool pmf) {
|
|
674
665
|
return pmf
|
|
675
666
|
? 2.446 / pow(k, 0.9433)
|
|
676
667
|
: 2.296 / pow(k, 0.9723);
|
|
677
668
|
}
|
|
678
669
|
|
|
679
670
|
// for deserialization
|
|
680
|
-
template<typename T, typename C, typename
|
|
681
|
-
kll_sketch<T, C,
|
|
682
|
-
std::unique_ptr<T, items_deleter> items, uint32_t items_size, std::unique_ptr<T, item_deleter>
|
|
683
|
-
std::unique_ptr<T, item_deleter>
|
|
671
|
+
template<typename T, typename C, typename A>
|
|
672
|
+
kll_sketch<T, C, A>::kll_sketch(uint16_t k, uint16_t min_k, uint64_t n, uint8_t num_levels, vector_u32&& levels,
|
|
673
|
+
std::unique_ptr<T, items_deleter> items, uint32_t items_size, std::unique_ptr<T, item_deleter> min_item,
|
|
674
|
+
std::unique_ptr<T, item_deleter> max_item, bool is_level_zero_sorted, const C& comparator):
|
|
675
|
+
comparator_(comparator),
|
|
684
676
|
allocator_(levels.get_allocator()),
|
|
685
677
|
k_(k),
|
|
686
678
|
m_(DEFAULT_M),
|
|
687
679
|
min_k_(min_k),
|
|
688
|
-
n_(n),
|
|
689
680
|
num_levels_(num_levels),
|
|
681
|
+
is_level_zero_sorted_(is_level_zero_sorted),
|
|
682
|
+
n_(n),
|
|
690
683
|
levels_(std::move(levels)),
|
|
691
684
|
items_(items.release()),
|
|
692
685
|
items_size_(items_size),
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
686
|
+
min_item_(min_item.release()),
|
|
687
|
+
max_item_(max_item.release()),
|
|
688
|
+
sorted_view_(nullptr)
|
|
696
689
|
{}
|
|
697
690
|
|
|
698
691
|
// The following code is only valid in the special case of exactly reaching capacity while updating.
|
|
699
692
|
// It cannot be used while merging, while reducing k, or anything else.
|
|
700
|
-
template<typename T, typename C, typename
|
|
701
|
-
void kll_sketch<T, C,
|
|
693
|
+
template<typename T, typename C, typename A>
|
|
694
|
+
void kll_sketch<T, C, A>::compress_while_updating(void) {
|
|
702
695
|
const uint8_t level = find_level_to_compact();
|
|
703
696
|
|
|
704
697
|
// It is important to add the new top level right here. Be aware that this operation
|
|
@@ -722,7 +715,7 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
|
|
|
722
715
|
// level zero might not be sorted, so we must sort it if we wish to compact it
|
|
723
716
|
// sort_level_zero() is not used here because of the adjustment for odd number of items
|
|
724
717
|
if ((level == 0) && !is_level_zero_sorted_) {
|
|
725
|
-
std::sort(items_ + adj_beg, items_ + adj_beg + adj_pop,
|
|
718
|
+
std::sort(items_ + adj_beg, items_ + adj_beg + adj_pop, comparator_);
|
|
726
719
|
}
|
|
727
720
|
if (pop_above == 0) {
|
|
728
721
|
kll_helper::randomly_halve_up(items_, adj_beg, adj_pop);
|
|
@@ -751,8 +744,8 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
|
|
|
751
744
|
for (uint32_t i = 0; i < half_adj_pop; i++) items_[i + destroy_beg].~T();
|
|
752
745
|
}
|
|
753
746
|
|
|
754
|
-
template<typename T, typename C, typename
|
|
755
|
-
uint8_t kll_sketch<T, C,
|
|
747
|
+
template<typename T, typename C, typename A>
|
|
748
|
+
uint8_t kll_sketch<T, C, A>::find_level_to_compact() const {
|
|
756
749
|
uint8_t level = 0;
|
|
757
750
|
while (true) {
|
|
758
751
|
if (level >= num_levels_) throw std::logic_error("capacity calculation error");
|
|
@@ -765,8 +758,8 @@ uint8_t kll_sketch<T, C, S, A>::find_level_to_compact() const {
|
|
|
765
758
|
}
|
|
766
759
|
}
|
|
767
760
|
|
|
768
|
-
template<typename T, typename C, typename
|
|
769
|
-
void kll_sketch<T, C,
|
|
761
|
+
template<typename T, typename C, typename A>
|
|
762
|
+
void kll_sketch<T, C, A>::add_empty_top_level_to_completely_full_sketch() {
|
|
770
763
|
const uint32_t cur_total_cap = levels_[num_levels_];
|
|
771
764
|
|
|
772
765
|
// make sure that we are following a certain growth scheme
|
|
@@ -800,124 +793,50 @@ void kll_sketch<T, C, S, A>::add_empty_top_level_to_completely_full_sketch() {
|
|
|
800
793
|
levels_[num_levels_] = new_total_cap; // initialize the new "extra" index at the top
|
|
801
794
|
}
|
|
802
795
|
|
|
803
|
-
template<typename T, typename C, typename
|
|
804
|
-
void kll_sketch<T, C,
|
|
796
|
+
template<typename T, typename C, typename A>
|
|
797
|
+
void kll_sketch<T, C, A>::sort_level_zero() {
|
|
805
798
|
if (!is_level_zero_sorted_) {
|
|
806
|
-
std::sort(items_ + levels_[0], items_ + levels_[1],
|
|
799
|
+
std::sort(items_ + levels_[0], items_ + levels_[1], comparator_);
|
|
807
800
|
is_level_zero_sorted_ = true;
|
|
808
801
|
}
|
|
809
802
|
}
|
|
810
803
|
|
|
811
|
-
template<typename T, typename C, typename
|
|
812
|
-
void kll_sketch<T, C,
|
|
804
|
+
template<typename T, typename C, typename A>
|
|
805
|
+
void kll_sketch<T, C, A>::check_sorting() const {
|
|
813
806
|
// not checking level 0
|
|
814
807
|
for (uint8_t level = 1; level < num_levels_; ++level) {
|
|
815
808
|
const auto from = items_ + levels_[level];
|
|
816
809
|
const auto to = items_ + levels_[level + 1];
|
|
817
|
-
if (!std::is_sorted(from, to,
|
|
810
|
+
if (!std::is_sorted(from, to, comparator_)) {
|
|
818
811
|
throw std::logic_error("levels must be sorted");
|
|
819
812
|
}
|
|
820
813
|
}
|
|
821
814
|
}
|
|
822
815
|
|
|
823
|
-
template<typename T, typename C, typename
|
|
824
|
-
|
|
825
|
-
quantile_sketch_sorted_view<T, C, A> kll_sketch<T, C, S, A>::get_sorted_view(bool cumulative) const {
|
|
816
|
+
template<typename T, typename C, typename A>
|
|
817
|
+
quantiles_sorted_view<T, C, A> kll_sketch<T, C, A>::get_sorted_view() const {
|
|
826
818
|
const_cast<kll_sketch*>(this)->sort_level_zero(); // allow this side effect
|
|
827
|
-
|
|
819
|
+
quantiles_sorted_view<T, C, A> view(get_num_retained(), comparator_, allocator_);
|
|
828
820
|
for (uint8_t level = 0; level < num_levels_; ++level) {
|
|
829
821
|
const auto from = items_ + levels_[level];
|
|
830
822
|
const auto to = items_ + levels_[level + 1]; // exclusive
|
|
831
823
|
view.add(from, to, 1 << level);
|
|
832
824
|
}
|
|
833
|
-
|
|
825
|
+
view.convert_to_cummulative();
|
|
834
826
|
return view;
|
|
835
827
|
}
|
|
836
828
|
|
|
837
|
-
template<typename T, typename C, typename
|
|
838
|
-
template<bool inclusive>
|
|
839
|
-
vector_d<A> kll_sketch<T, C, S, A>::get_PMF_or_CDF(const T* split_points, uint32_t size, bool is_CDF) const {
|
|
840
|
-
if (is_empty()) return vector_d<A>(allocator_);
|
|
841
|
-
kll_helper::validate_values<T, C>(split_points, size);
|
|
842
|
-
vector_d<A> buckets(size + 1, 0, allocator_);
|
|
843
|
-
uint8_t level = 0;
|
|
844
|
-
uint64_t weight = 1;
|
|
845
|
-
while (level < num_levels_) {
|
|
846
|
-
const auto from_index = levels_[level];
|
|
847
|
-
const auto to_index = levels_[level + 1]; // exclusive
|
|
848
|
-
if ((level == 0) && !is_level_zero_sorted_) {
|
|
849
|
-
increment_buckets_unsorted_level<inclusive>(from_index, to_index, weight, split_points, size, buckets.data());
|
|
850
|
-
} else {
|
|
851
|
-
increment_buckets_sorted_level<inclusive>(from_index, to_index, weight, split_points, size, buckets.data());
|
|
852
|
-
}
|
|
853
|
-
level++;
|
|
854
|
-
weight *= 2;
|
|
855
|
-
}
|
|
856
|
-
// normalize and, if CDF, convert to cumulative
|
|
857
|
-
if (is_CDF) {
|
|
858
|
-
double subtotal = 0;
|
|
859
|
-
for (uint32_t i = 0; i <= size; i++) {
|
|
860
|
-
subtotal += buckets[i];
|
|
861
|
-
buckets[i] = subtotal / n_;
|
|
862
|
-
}
|
|
863
|
-
} else {
|
|
864
|
-
for (uint32_t i = 0; i <= size; i++) {
|
|
865
|
-
buckets[i] /= n_;
|
|
866
|
-
}
|
|
867
|
-
}
|
|
868
|
-
return buckets;
|
|
869
|
-
}
|
|
870
|
-
|
|
871
|
-
template<typename T, typename C, typename S, typename A>
|
|
872
|
-
template<bool inclusive>
|
|
873
|
-
void kll_sketch<T, C, S, A>::increment_buckets_unsorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
|
|
874
|
-
const T* split_points, uint32_t size, double* buckets) const
|
|
875
|
-
{
|
|
876
|
-
for (uint32_t i = from_index; i < to_index; i++) {
|
|
877
|
-
uint32_t j;
|
|
878
|
-
for (j = 0; j < size; j++) {
|
|
879
|
-
if (inclusive ? !C()(split_points[j], items_[i]) : C()(items_[i], split_points[j])) {
|
|
880
|
-
break;
|
|
881
|
-
}
|
|
882
|
-
}
|
|
883
|
-
buckets[j] += weight;
|
|
884
|
-
}
|
|
885
|
-
}
|
|
886
|
-
|
|
887
|
-
template<typename T, typename C, typename S, typename A>
|
|
888
|
-
template<bool inclusive>
|
|
889
|
-
void kll_sketch<T, C, S, A>::increment_buckets_sorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
|
|
890
|
-
const T* split_points, uint32_t size, double* buckets) const
|
|
891
|
-
{
|
|
892
|
-
uint32_t i = from_index;
|
|
893
|
-
uint32_t j = 0;
|
|
894
|
-
while ((i < to_index) && (j < size)) {
|
|
895
|
-
if (inclusive ? !C()(split_points[j], items_[i]) : C()(items_[i], split_points[j])) {
|
|
896
|
-
buckets[j] += weight; // this sample goes into this bucket
|
|
897
|
-
i++; // move on to next sample and see whether it also goes into this bucket
|
|
898
|
-
} else {
|
|
899
|
-
j++; // no more samples for this bucket
|
|
900
|
-
}
|
|
901
|
-
}
|
|
902
|
-
// now either i == to_index (we are out of samples), or
|
|
903
|
-
// j == size (we are out of buckets, but there are more samples remaining)
|
|
904
|
-
// we only need to do something in the latter case
|
|
905
|
-
if (j == size) {
|
|
906
|
-
buckets[j] += weight * (to_index - i);
|
|
907
|
-
}
|
|
908
|
-
}
|
|
909
|
-
|
|
910
|
-
template<typename T, typename C, typename S, typename A>
|
|
829
|
+
template<typename T, typename C, typename A>
|
|
911
830
|
template<typename O>
|
|
912
|
-
void kll_sketch<T, C,
|
|
831
|
+
void kll_sketch<T, C, A>::merge_higher_levels(O&& other, uint64_t final_n) {
|
|
913
832
|
const uint32_t tmp_num_items = get_num_retained() + other.get_num_retained_above_level_zero();
|
|
914
833
|
A alloc(allocator_);
|
|
915
834
|
auto tmp_items_deleter = [tmp_num_items, &alloc](T* ptr) { alloc.deallocate(ptr, tmp_num_items); }; // no destructor needed
|
|
916
835
|
const std::unique_ptr<T, decltype(tmp_items_deleter)> workbuf(allocator_.allocate(tmp_num_items), tmp_items_deleter);
|
|
917
836
|
const uint8_t ub = kll_helper::ub_on_num_levels(final_n);
|
|
918
837
|
const size_t work_levels_size = ub + 2; // ub+1 does not work
|
|
919
|
-
vector_u32
|
|
920
|
-
vector_u32
|
|
838
|
+
vector_u32 worklevels(work_levels_size, 0, allocator_);
|
|
839
|
+
vector_u32 outlevels(work_levels_size, 0, allocator_);
|
|
921
840
|
|
|
922
841
|
const uint8_t provisional_num_levels = std::max(num_levels_, other.num_levels_);
|
|
923
842
|
|
|
@@ -950,9 +869,9 @@ void kll_sketch<T, C, S, A>::merge_higher_levels(O&& other, uint64_t final_n) {
|
|
|
950
869
|
}
|
|
951
870
|
|
|
952
871
|
// this leaves items_ uninitialized (all objects moved out and destroyed)
|
|
953
|
-
template<typename T, typename C, typename
|
|
872
|
+
template<typename T, typename C, typename A>
|
|
954
873
|
template<typename FwdSk>
|
|
955
|
-
void kll_sketch<T, C,
|
|
874
|
+
void kll_sketch<T, C, A>::populate_work_arrays(FwdSk&& other, T* workbuf, uint32_t* worklevels, uint8_t provisional_num_levels) {
|
|
956
875
|
worklevels[0] = 0;
|
|
957
876
|
|
|
958
877
|
// the level zero data from "other" was already inserted into "this"
|
|
@@ -976,36 +895,36 @@ void kll_sketch<T, C, S, A>::populate_work_arrays(FwdSk&& other, T* workbuf, uin
|
|
|
976
895
|
}
|
|
977
896
|
}
|
|
978
897
|
|
|
979
|
-
template<typename T, typename C, typename
|
|
980
|
-
void kll_sketch<T, C,
|
|
898
|
+
template<typename T, typename C, typename A>
|
|
899
|
+
void kll_sketch<T, C, A>::assert_correct_total_weight() const {
|
|
981
900
|
const uint64_t total(kll_helper::sum_the_sample_weights(num_levels_, levels_.data()));
|
|
982
901
|
if (total != n_) {
|
|
983
902
|
throw std::logic_error("Total weight does not match N");
|
|
984
903
|
}
|
|
985
904
|
}
|
|
986
905
|
|
|
987
|
-
template<typename T, typename C, typename
|
|
988
|
-
uint32_t kll_sketch<T, C,
|
|
906
|
+
template<typename T, typename C, typename A>
|
|
907
|
+
uint32_t kll_sketch<T, C, A>::safe_level_size(uint8_t level) const {
|
|
989
908
|
if (level >= num_levels_) return 0;
|
|
990
909
|
return levels_[level + 1] - levels_[level];
|
|
991
910
|
}
|
|
992
911
|
|
|
993
|
-
template<typename T, typename C, typename
|
|
994
|
-
uint32_t kll_sketch<T, C,
|
|
912
|
+
template<typename T, typename C, typename A>
|
|
913
|
+
uint32_t kll_sketch<T, C, A>::get_num_retained_above_level_zero() const {
|
|
995
914
|
if (num_levels_ == 1) return 0;
|
|
996
915
|
return levels_[num_levels_] - levels_[1];
|
|
997
916
|
}
|
|
998
917
|
|
|
999
|
-
template<typename T, typename C, typename
|
|
1000
|
-
void kll_sketch<T, C,
|
|
918
|
+
template<typename T, typename C, typename A>
|
|
919
|
+
void kll_sketch<T, C, A>::check_m(uint8_t m) {
|
|
1001
920
|
if (m != DEFAULT_M) {
|
|
1002
921
|
throw std::invalid_argument("Possible corruption: M must be " + std::to_string(DEFAULT_M)
|
|
1003
922
|
+ ": " + std::to_string(m));
|
|
1004
923
|
}
|
|
1005
924
|
}
|
|
1006
925
|
|
|
1007
|
-
template<typename T, typename C, typename
|
|
1008
|
-
void kll_sketch<T, C,
|
|
926
|
+
template<typename T, typename C, typename A>
|
|
927
|
+
void kll_sketch<T, C, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t flags_byte) {
|
|
1009
928
|
const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
|
|
1010
929
|
const bool is_single_item(flags_byte & (1 << flags::IS_SINGLE_ITEM));
|
|
1011
930
|
if (is_empty || is_single_item) {
|
|
@@ -1021,8 +940,8 @@ void kll_sketch<T, C, S, A>::check_preamble_ints(uint8_t preamble_ints, uint8_t
|
|
|
1021
940
|
}
|
|
1022
941
|
}
|
|
1023
942
|
|
|
1024
|
-
template<typename T, typename C, typename
|
|
1025
|
-
void kll_sketch<T, C,
|
|
943
|
+
template<typename T, typename C, typename A>
|
|
944
|
+
void kll_sketch<T, C, A>::check_serial_version(uint8_t serial_version) {
|
|
1026
945
|
if (serial_version != SERIAL_VERSION_1 && serial_version != SERIAL_VERSION_2) {
|
|
1027
946
|
throw std::invalid_argument("Possible corruption: serial version mismatch: expected "
|
|
1028
947
|
+ std::to_string(SERIAL_VERSION_1) + " or " + std::to_string(SERIAL_VERSION_2)
|
|
@@ -1030,16 +949,16 @@ void kll_sketch<T, C, S, A>::check_serial_version(uint8_t serial_version) {
|
|
|
1030
949
|
}
|
|
1031
950
|
}
|
|
1032
951
|
|
|
1033
|
-
template<typename T, typename C, typename
|
|
1034
|
-
void kll_sketch<T, C,
|
|
952
|
+
template<typename T, typename C, typename A>
|
|
953
|
+
void kll_sketch<T, C, A>::check_family_id(uint8_t family_id) {
|
|
1035
954
|
if (family_id != FAMILY) {
|
|
1036
955
|
throw std::invalid_argument("Possible corruption: family mismatch: expected "
|
|
1037
956
|
+ std::to_string(FAMILY) + ", got " + std::to_string(family_id));
|
|
1038
957
|
}
|
|
1039
958
|
}
|
|
1040
959
|
|
|
1041
|
-
template <typename T, typename C, typename
|
|
1042
|
-
string<A> kll_sketch<T, C,
|
|
960
|
+
template <typename T, typename C, typename A>
|
|
961
|
+
string<A> kll_sketch<T, C, A>::to_string(bool print_levels, bool print_items) const {
|
|
1043
962
|
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
|
1044
963
|
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
|
1045
964
|
std::ostringstream os;
|
|
@@ -1057,8 +976,8 @@ string<A> kll_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
|
|
|
1057
976
|
os << " Capacity items : " << items_size_ << std::endl;
|
|
1058
977
|
os << " Retained items : " << get_num_retained() << std::endl;
|
|
1059
978
|
if (!is_empty()) {
|
|
1060
|
-
os << " Min
|
|
1061
|
-
os << " Max
|
|
979
|
+
os << " Min item : " << *min_item_ << std::endl;
|
|
980
|
+
os << " Max item : " << *max_item_ << std::endl;
|
|
1062
981
|
}
|
|
1063
982
|
os << "### End sketch summary" << std::endl;
|
|
1064
983
|
|
|
@@ -1090,25 +1009,74 @@ string<A> kll_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
|
|
|
1090
1009
|
return string<A>(os.str().c_str(), allocator_);
|
|
1091
1010
|
}
|
|
1092
1011
|
|
|
1093
|
-
template <typename T, typename C, typename
|
|
1094
|
-
typename kll_sketch<T, C,
|
|
1095
|
-
return kll_sketch<T, C,
|
|
1012
|
+
template <typename T, typename C, typename A>
|
|
1013
|
+
typename kll_sketch<T, C, A>::const_iterator kll_sketch<T, C, A>::begin() const {
|
|
1014
|
+
return kll_sketch<T, C, A>::const_iterator(items_, levels_.data(), num_levels_);
|
|
1015
|
+
}
|
|
1016
|
+
|
|
1017
|
+
template <typename T, typename C, typename A>
|
|
1018
|
+
typename kll_sketch<T, C, A>::const_iterator kll_sketch<T, C, A>::end() const {
|
|
1019
|
+
return kll_sketch<T, C, A>::const_iterator(nullptr, levels_.data(), num_levels_);
|
|
1020
|
+
}
|
|
1021
|
+
|
|
1022
|
+
template<typename T, typename C, typename A>
|
|
1023
|
+
class kll_sketch<T, C, A>::item_deleter {
|
|
1024
|
+
public:
|
|
1025
|
+
item_deleter(const A& allocator): allocator_(allocator) {}
|
|
1026
|
+
void operator() (T* ptr) {
|
|
1027
|
+
if (ptr != nullptr) {
|
|
1028
|
+
ptr->~T();
|
|
1029
|
+
allocator_.deallocate(ptr, 1);
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1032
|
+
private:
|
|
1033
|
+
A allocator_;
|
|
1034
|
+
};
|
|
1035
|
+
|
|
1036
|
+
template<typename T, typename C, typename A>
|
|
1037
|
+
class kll_sketch<T, C, A>::items_deleter {
|
|
1038
|
+
public:
|
|
1039
|
+
items_deleter(uint32_t start, uint32_t num, const A& allocator):
|
|
1040
|
+
allocator_(allocator), start_(start), num_(num) {}
|
|
1041
|
+
void operator() (T* ptr) {
|
|
1042
|
+
if (ptr != nullptr) {
|
|
1043
|
+
for (uint32_t i = start_; i < num_; ++i) ptr[i].~T();
|
|
1044
|
+
allocator_.deallocate(ptr, num_);
|
|
1045
|
+
}
|
|
1046
|
+
}
|
|
1047
|
+
private:
|
|
1048
|
+
A allocator_;
|
|
1049
|
+
uint32_t start_;
|
|
1050
|
+
uint32_t num_;
|
|
1051
|
+
};
|
|
1052
|
+
|
|
1053
|
+
template<typename T, typename C, typename A>
|
|
1054
|
+
void kll_sketch<T, C, A>::setup_sorted_view() const {
|
|
1055
|
+
if (sorted_view_ == nullptr) {
|
|
1056
|
+
using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
|
|
1057
|
+
sorted_view_ = new (AllocSortedView(allocator_).allocate(1)) quantiles_sorted_view<T, C, A>(get_sorted_view());
|
|
1058
|
+
}
|
|
1096
1059
|
}
|
|
1097
1060
|
|
|
1098
|
-
template
|
|
1099
|
-
|
|
1100
|
-
|
|
1061
|
+
template<typename T, typename C, typename A>
|
|
1062
|
+
void kll_sketch<T, C, A>::reset_sorted_view() {
|
|
1063
|
+
if (sorted_view_ != nullptr) {
|
|
1064
|
+
sorted_view_->~quantiles_sorted_view();
|
|
1065
|
+
using AllocSortedView = typename std::allocator_traits<A>::template rebind_alloc<quantiles_sorted_view<T, C, A>>;
|
|
1066
|
+
AllocSortedView(allocator_).deallocate(sorted_view_, 1);
|
|
1067
|
+
sorted_view_ = nullptr;
|
|
1068
|
+
}
|
|
1101
1069
|
}
|
|
1102
1070
|
|
|
1103
1071
|
// kll_sketch::const_iterator implementation
|
|
1104
1072
|
|
|
1105
|
-
template<typename T, typename C, typename
|
|
1106
|
-
kll_sketch<T, C,
|
|
1073
|
+
template<typename T, typename C, typename A>
|
|
1074
|
+
kll_sketch<T, C, A>::const_iterator::const_iterator(const T* items, const uint32_t* levels, const uint8_t num_levels):
|
|
1107
1075
|
items(items), levels(levels), num_levels(num_levels), index(items == nullptr ? levels[num_levels] : levels[0]), level(items == nullptr ? num_levels : 0), weight(1)
|
|
1108
1076
|
{}
|
|
1109
1077
|
|
|
1110
|
-
template<typename T, typename C, typename
|
|
1111
|
-
typename kll_sketch<T, C,
|
|
1078
|
+
template<typename T, typename C, typename A>
|
|
1079
|
+
typename kll_sketch<T, C, A>::const_iterator& kll_sketch<T, C, A>::const_iterator::operator++() {
|
|
1112
1080
|
++index;
|
|
1113
1081
|
if (index == levels[level + 1]) { // go to the next non-empty level
|
|
1114
1082
|
do {
|
|
@@ -1119,58 +1087,32 @@ typename kll_sketch<T, C, S, A>::const_iterator& kll_sketch<T, C, S, A>::const_i
|
|
|
1119
1087
|
return *this;
|
|
1120
1088
|
}
|
|
1121
1089
|
|
|
1122
|
-
template<typename T, typename C, typename
|
|
1123
|
-
typename kll_sketch<T, C,
|
|
1090
|
+
template<typename T, typename C, typename A>
|
|
1091
|
+
typename kll_sketch<T, C, A>::const_iterator& kll_sketch<T, C, A>::const_iterator::operator++(int) {
|
|
1124
1092
|
const_iterator tmp(*this);
|
|
1125
1093
|
operator++();
|
|
1126
1094
|
return tmp;
|
|
1127
1095
|
}
|
|
1128
1096
|
|
|
1129
|
-
template<typename T, typename C, typename
|
|
1130
|
-
bool kll_sketch<T, C,
|
|
1097
|
+
template<typename T, typename C, typename A>
|
|
1098
|
+
bool kll_sketch<T, C, A>::const_iterator::operator==(const const_iterator& other) const {
|
|
1131
1099
|
return index == other.index;
|
|
1132
1100
|
}
|
|
1133
1101
|
|
|
1134
|
-
template<typename T, typename C, typename
|
|
1135
|
-
bool kll_sketch<T, C,
|
|
1102
|
+
template<typename T, typename C, typename A>
|
|
1103
|
+
bool kll_sketch<T, C, A>::const_iterator::operator!=(const const_iterator& other) const {
|
|
1136
1104
|
return !operator==(other);
|
|
1137
1105
|
}
|
|
1138
1106
|
|
|
1139
|
-
template<typename T, typename C, typename
|
|
1140
|
-
|
|
1141
|
-
return
|
|
1107
|
+
template<typename T, typename C, typename A>
|
|
1108
|
+
auto kll_sketch<T, C, A>::const_iterator::operator*() const -> const value_type {
|
|
1109
|
+
return value_type(items[index], weight);
|
|
1142
1110
|
}
|
|
1143
1111
|
|
|
1144
|
-
template<typename T, typename C, typename
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
void operator() (T* ptr) {
|
|
1149
|
-
if (ptr != nullptr) {
|
|
1150
|
-
ptr->~T();
|
|
1151
|
-
allocator_.deallocate(ptr, 1);
|
|
1152
|
-
}
|
|
1153
|
-
}
|
|
1154
|
-
private:
|
|
1155
|
-
A allocator_;
|
|
1156
|
-
};
|
|
1157
|
-
|
|
1158
|
-
template<typename T, typename C, typename S, typename A>
|
|
1159
|
-
class kll_sketch<T, C, S, A>::items_deleter {
|
|
1160
|
-
public:
|
|
1161
|
-
items_deleter(uint32_t start, uint32_t num, const A& allocator):
|
|
1162
|
-
allocator_(allocator), start_(start), num_(num) {}
|
|
1163
|
-
void operator() (T* ptr) {
|
|
1164
|
-
if (ptr != nullptr) {
|
|
1165
|
-
for (uint32_t i = start_; i < num_; ++i) ptr[i].~T();
|
|
1166
|
-
allocator_.deallocate(ptr, num_);
|
|
1167
|
-
}
|
|
1168
|
-
}
|
|
1169
|
-
private:
|
|
1170
|
-
A allocator_;
|
|
1171
|
-
uint32_t start_;
|
|
1172
|
-
uint32_t num_;
|
|
1173
|
-
};
|
|
1112
|
+
template<typename T, typename C, typename A>
|
|
1113
|
+
auto kll_sketch<T, C, A>::const_iterator::operator->() const -> const return_value_holder<value_type> {
|
|
1114
|
+
return **this;
|
|
1115
|
+
}
|
|
1174
1116
|
|
|
1175
1117
|
} /* namespace datasketches */
|
|
1176
1118
|
|