datasketches 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/kll_wrapper.cpp +20 -20
- data/ext/datasketches/theta_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
- data/vendor/datasketches-cpp/MANIFEST.in +21 -2
- data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
- data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
- data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
- data/vendor/datasketches-cpp/pyproject.toml +17 -12
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
- data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/setup.py +14 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
- data/vendor/datasketches-cpp/tox.ini +26 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
- data/vendor/datasketches-cpp/version.cfg.in +1 -0
- metadata +14 -5
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
|
@@ -20,18 +20,17 @@
|
|
|
20
20
|
#ifndef REQ_SKETCH_HPP_
|
|
21
21
|
#define REQ_SKETCH_HPP_
|
|
22
22
|
|
|
23
|
+
#include <iterator>
|
|
24
|
+
|
|
23
25
|
#include "req_common.hpp"
|
|
24
26
|
#include "req_compactor.hpp"
|
|
25
|
-
#include "
|
|
26
|
-
|
|
27
|
-
#include <stdexcept>
|
|
27
|
+
#include "quantiles_sorted_view.hpp"
|
|
28
28
|
|
|
29
29
|
namespace datasketches {
|
|
30
30
|
|
|
31
31
|
template<
|
|
32
32
|
typename T,
|
|
33
33
|
typename Comparator = std::less<T>, // strict weak ordering function (see C++ named requirements: Compare)
|
|
34
|
-
typename S = serde<T>, // deprecated, to be removed in the next major version
|
|
35
34
|
typename Allocator = std::allocator<T>
|
|
36
35
|
>
|
|
37
36
|
class req_sketch {
|
|
@@ -40,7 +39,6 @@ public:
|
|
|
40
39
|
using comparator = Comparator;
|
|
41
40
|
using Compactor = req_compactor<T, Comparator, Allocator>;
|
|
42
41
|
using AllocCompactor = typename std::allocator_traits<Allocator>::template rebind_alloc<Compactor>;
|
|
43
|
-
using vector_double = std::vector<double, typename std::allocator_traits<Allocator>::template rebind_alloc<double>>;
|
|
44
42
|
|
|
45
43
|
/**
|
|
46
44
|
* Constructor
|
|
@@ -48,9 +46,11 @@ public:
|
|
|
48
46
|
* Value of 12 roughly corresponds to 1% relative error guarantee at 95% confidence.
|
|
49
47
|
* @param hra if true, the default, the high ranks are prioritized for better
|
|
50
48
|
* accuracy. Otherwise the low ranks are prioritized for better accuracy.
|
|
49
|
+
* @param comparator to use by this instance
|
|
51
50
|
* @param allocator to use by this instance
|
|
52
51
|
*/
|
|
53
|
-
explicit req_sketch(uint16_t k, bool hra = true, const
|
|
52
|
+
explicit req_sketch(uint16_t k, bool hra = true, const Comparator& comparator = Comparator(),
|
|
53
|
+
const Allocator& allocator = Allocator());
|
|
54
54
|
|
|
55
55
|
~req_sketch();
|
|
56
56
|
req_sketch(const req_sketch& other);
|
|
@@ -61,10 +61,12 @@ public:
|
|
|
61
61
|
/*
|
|
62
62
|
* Type converting constructor.
|
|
63
63
|
* @param other sketch of a different type
|
|
64
|
+
* @param comparator instance of a Comparator
|
|
64
65
|
* @param allocator instance of an Allocator
|
|
65
66
|
*/
|
|
66
|
-
template<typename TT, typename CC, typename
|
|
67
|
-
explicit req_sketch(const req_sketch<TT, CC,
|
|
67
|
+
template<typename TT, typename CC, typename AA>
|
|
68
|
+
explicit req_sketch(const req_sketch<TT, CC, AA>& other, const Comparator& comparator = Comparator(),
|
|
69
|
+
const Allocator& allocator = Allocator());
|
|
68
70
|
|
|
69
71
|
/**
|
|
70
72
|
* Returns configured parameter K
|
|
@@ -102,27 +104,33 @@ public:
|
|
|
102
104
|
*/
|
|
103
105
|
bool is_estimation_mode() const;
|
|
104
106
|
|
|
107
|
+
/**
|
|
108
|
+
* Updates this sketch with the given data item.
|
|
109
|
+
* @param item from a stream of items
|
|
110
|
+
*/
|
|
105
111
|
template<typename FwdT>
|
|
106
112
|
void update(FwdT&& item);
|
|
107
113
|
|
|
114
|
+
/**
|
|
115
|
+
* Merges another sketch into this one.
|
|
116
|
+
* @param other sketch to merge into this one
|
|
117
|
+
*/
|
|
108
118
|
template<typename FwdSk>
|
|
109
119
|
void merge(FwdSk&& other);
|
|
110
120
|
|
|
111
121
|
/**
|
|
112
|
-
* Returns the min
|
|
113
|
-
*
|
|
114
|
-
*
|
|
115
|
-
* @return the min value of the stream
|
|
122
|
+
* Returns the min item of the stream.
|
|
123
|
+
* If the sketch is empty this throws std::runtime_error.
|
|
124
|
+
* @return the min item of the stream
|
|
116
125
|
*/
|
|
117
|
-
const T&
|
|
126
|
+
const T& get_min_item() const;
|
|
118
127
|
|
|
119
128
|
/**
|
|
120
|
-
* Returns the max
|
|
121
|
-
*
|
|
122
|
-
*
|
|
123
|
-
* @return the max value of the stream
|
|
129
|
+
* Returns the max item of the stream.
|
|
130
|
+
* If the sketch is empty this throws std::runtime_error.
|
|
131
|
+
* @return the max item of the stream
|
|
124
132
|
*/
|
|
125
|
-
const T&
|
|
133
|
+
const T& get_max_item() const;
|
|
126
134
|
|
|
127
135
|
/**
|
|
128
136
|
* Returns an instance of the comparator for this sketch.
|
|
@@ -131,84 +139,99 @@ public:
|
|
|
131
139
|
Comparator get_comparator() const;
|
|
132
140
|
|
|
133
141
|
/**
|
|
134
|
-
* Returns an
|
|
135
|
-
*
|
|
136
|
-
|
|
142
|
+
* Returns an instance of the allocator for this sketch.
|
|
143
|
+
* @return allocator
|
|
144
|
+
*/
|
|
145
|
+
Allocator get_allocator() const;
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Returns an approximation to the normalized rank of the given item from 0 to 1 inclusive.
|
|
149
|
+
*
|
|
150
|
+
* <p>If the sketch is empty this throws std::runtime_error.
|
|
137
151
|
*
|
|
138
|
-
*
|
|
152
|
+
* @param item to be ranked.
|
|
153
|
+
* @param inclusive if true the weight of the given item is included into the rank.
|
|
154
|
+
* Otherwise the rank equals the sum of the weights of all items that are less than the given item
|
|
155
|
+
* according to the comparator C.
|
|
139
156
|
*
|
|
140
|
-
* @param item to be ranked
|
|
141
157
|
* @return an approximate rank of the given item
|
|
142
158
|
*/
|
|
143
|
-
|
|
144
|
-
double get_rank(const T& item) const;
|
|
159
|
+
double get_rank(const T& item, bool inclusive = true) const;
|
|
145
160
|
|
|
146
161
|
/**
|
|
147
162
|
* Returns an approximation to the Probability Mass Function (PMF) of the input stream
|
|
148
|
-
* given a set of split points (
|
|
163
|
+
* given a set of split points (items).
|
|
149
164
|
*
|
|
150
|
-
* <p>If the sketch is empty this
|
|
165
|
+
* <p>If the sketch is empty this throws std::runtime_error.
|
|
151
166
|
*
|
|
152
|
-
* @param split_points an array of <i>m</i> unique, monotonically increasing
|
|
153
|
-
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
|
|
154
|
-
*
|
|
155
|
-
*
|
|
156
|
-
*
|
|
157
|
-
*
|
|
158
|
-
*
|
|
167
|
+
* @param split_points an array of <i>m</i> unique, monotonically increasing items
|
|
168
|
+
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals (bins).
|
|
169
|
+
*
|
|
170
|
+
* @param size the number of split points in the array
|
|
171
|
+
*
|
|
172
|
+
* @param inclusive if true the rank of an item includes its own weight, and therefore
|
|
173
|
+
* if the sketch contains items equal to a slit point, then in PMF such items are
|
|
174
|
+
* included into the interval to the left of split point. Otherwise they are included into the interval
|
|
175
|
+
* to the right of split point.
|
|
159
176
|
*
|
|
160
177
|
* @return an array of m+1 doubles each of which is an approximation
|
|
161
|
-
* to the fraction of the input stream
|
|
162
|
-
* If the template parameter inclusive=false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
|
|
163
|
-
* split point, with the exception that the last interval will include the maximum value.
|
|
164
|
-
* If the template parameter inclusive=true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
|
|
165
|
-
* split point.
|
|
178
|
+
* to the fraction of the input stream items (the mass) that fall into one of those intervals.
|
|
166
179
|
*/
|
|
167
|
-
|
|
168
|
-
vector_double get_PMF(const T* split_points, uint32_t size) const;
|
|
180
|
+
using vector_double = typename quantiles_sorted_view<T, Comparator, Allocator>::vector_double;
|
|
181
|
+
vector_double get_PMF(const T* split_points, uint32_t size, bool inclusive = true) const;
|
|
169
182
|
|
|
170
183
|
/**
|
|
171
184
|
* Returns an approximation to the Cumulative Distribution Function (CDF), which is the
|
|
172
|
-
* cumulative analog of the PMF, of the input stream given a set of split points (
|
|
185
|
+
* cumulative analog of the PMF, of the input stream given a set of split points (items).
|
|
173
186
|
*
|
|
174
|
-
* <p>If the sketch is empty this
|
|
187
|
+
* <p>If the sketch is empty this throws std::runtime_error.
|
|
175
188
|
*
|
|
176
|
-
* @param split_points an array of <i>m</i> unique, monotonically increasing
|
|
189
|
+
* @param split_points an array of <i>m</i> unique, monotonically increasing items
|
|
177
190
|
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
|
|
178
|
-
* If the template parameter inclusive=false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
|
|
179
|
-
* split point, with the exception that the last interval will include the maximum value.
|
|
180
|
-
* If the template parameter inclusive=true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
|
|
181
|
-
* split point.
|
|
182
|
-
* It is not necessary to include either the min or max values in these split points.
|
|
183
191
|
*
|
|
184
|
-
* @
|
|
192
|
+
* @param size the number of split points in the array
|
|
193
|
+
*
|
|
194
|
+
* @param inclusive if true the rank of an item includes its own weight, and therefore
|
|
195
|
+
* if the sketch contains items equal to a slit point, then in CDF such items are
|
|
196
|
+
* included into the interval to the left of split point. Otherwise they are included into
|
|
197
|
+
* the interval to the right of split point.
|
|
198
|
+
*
|
|
199
|
+
* @return an array of m+1 doubles, which are a consecutive approximation to the CDF
|
|
185
200
|
* of the input stream given the split_points. The value at array position j of the returned
|
|
186
201
|
* CDF array is the sum of the returned values in positions 0 through j of the returned PMF
|
|
187
|
-
* array.
|
|
202
|
+
* array. This can be viewed as array of ranks of the given split points plus one more value
|
|
203
|
+
* that is always 1.
|
|
188
204
|
*/
|
|
189
|
-
|
|
190
|
-
vector_double get_CDF(const T* split_points, uint32_t size) const;
|
|
205
|
+
vector_double get_CDF(const T* split_points, uint32_t size, bool inclusive = true) const;
|
|
191
206
|
|
|
192
207
|
/**
|
|
193
208
|
* Returns an approximate quantile of the given normalized rank.
|
|
194
209
|
* The normalized rank must be in the range [0.0, 1.0] (both inclusive).
|
|
195
|
-
*
|
|
196
|
-
*
|
|
210
|
+
* <p>If the sketch is empty this throws std::runtime_error.
|
|
211
|
+
*
|
|
212
|
+
* @param rank of an item in the hypothetical sorted stream.
|
|
213
|
+
* @param inclusive if true, the given rank is considered inclusive (includes weight of an item)
|
|
214
|
+
*
|
|
215
|
+
* @return approximate quantile associated with the given rank
|
|
197
216
|
*/
|
|
198
|
-
using quantile_return_type = typename
|
|
199
|
-
|
|
200
|
-
quantile_return_type get_quantile(double rank) const;
|
|
217
|
+
using quantile_return_type = typename quantiles_sorted_view<T, Comparator, Allocator>::quantile_return_type;
|
|
218
|
+
quantile_return_type get_quantile(double rank, bool inclusive = true) const;
|
|
201
219
|
|
|
202
220
|
/**
|
|
203
221
|
* Returns an array of quantiles that correspond to the given array of normalized ranks.
|
|
222
|
+
* <p>If the sketch is empty this throws std::runtime_error.
|
|
223
|
+
*
|
|
204
224
|
* @param ranks given array of normalized ranks.
|
|
225
|
+
* @param size the number of ranks in the array.
|
|
226
|
+
*
|
|
205
227
|
* @return array of quantiles that correspond to the given array of normalized ranks
|
|
228
|
+
*
|
|
229
|
+
* Deprecated. Will be removed in the next major version. Use get_quantile() instead.
|
|
206
230
|
*/
|
|
207
|
-
|
|
208
|
-
std::vector<T, Allocator> get_quantiles(const double* ranks, uint32_t size) const;
|
|
231
|
+
std::vector<T, Allocator> get_quantiles(const double* ranks, uint32_t size, bool inclusive = true) const;
|
|
209
232
|
|
|
210
233
|
/**
|
|
211
|
-
* Returns an approximate lower bound of the given
|
|
234
|
+
* Returns an approximate lower bound of the given normalized rank.
|
|
212
235
|
* @param rank the given rank, a value between 0 and 1.0.
|
|
213
236
|
* @param num_std_dev the number of standard deviations. Must be 1, 2, or 3.
|
|
214
237
|
* @return an approximate lower bound rank.
|
|
@@ -216,7 +239,7 @@ public:
|
|
|
216
239
|
double get_rank_lower_bound(double rank, uint8_t num_std_dev) const;
|
|
217
240
|
|
|
218
241
|
/**
|
|
219
|
-
* Returns an approximate upper bound of the given
|
|
242
|
+
* Returns an approximate upper bound of the given normalized rank.
|
|
220
243
|
* @param rank the given rank, a value between 0 and 1.0.
|
|
221
244
|
* @param num_std_dev the number of standard deviations. Must be 1, 2, or 3.
|
|
222
245
|
* @return an approximate upper bound rank.
|
|
@@ -239,27 +262,27 @@ public:
|
|
|
239
262
|
/**
|
|
240
263
|
* Computes size needed to serialize the current state of the sketch.
|
|
241
264
|
* This version is for fixed-size arithmetic types (integral and floating point).
|
|
242
|
-
* @param instance of a SerDe
|
|
265
|
+
* @param sd instance of a SerDe
|
|
243
266
|
* @return size in bytes needed to serialize this sketch
|
|
244
267
|
*/
|
|
245
|
-
template<typename TT = T, typename SerDe =
|
|
268
|
+
template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
|
|
246
269
|
size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
|
|
247
270
|
|
|
248
271
|
/**
|
|
249
272
|
* Computes size needed to serialize the current state of the sketch.
|
|
250
273
|
* This version is for all other types and can be expensive since every item needs to be looked at.
|
|
251
|
-
* @param instance of a SerDe
|
|
274
|
+
* @param sd instance of a SerDe
|
|
252
275
|
* @return size in bytes needed to serialize this sketch
|
|
253
276
|
*/
|
|
254
|
-
template<typename TT = T, typename SerDe =
|
|
277
|
+
template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
|
|
255
278
|
size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
|
|
256
279
|
|
|
257
280
|
/**
|
|
258
281
|
* This method serializes the sketch into a given stream in a binary form
|
|
259
282
|
* @param os output stream
|
|
260
|
-
* @param instance of a SerDe
|
|
283
|
+
* @param sd instance of a SerDe
|
|
261
284
|
*/
|
|
262
|
-
template<typename SerDe =
|
|
285
|
+
template<typename SerDe = serde<T>>
|
|
263
286
|
void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
|
|
264
287
|
|
|
265
288
|
// This is a convenience alias for users
|
|
@@ -272,52 +295,35 @@ public:
|
|
|
272
295
|
* It is a blank space of a given size.
|
|
273
296
|
* This header is used in Datasketches PostgreSQL extension.
|
|
274
297
|
* @param header_size_bytes space to reserve in front of the sketch
|
|
275
|
-
* @param instance of a SerDe
|
|
298
|
+
* @param sd instance of a SerDe
|
|
276
299
|
*/
|
|
277
|
-
template<typename SerDe =
|
|
300
|
+
template<typename SerDe = serde<T>>
|
|
278
301
|
vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
|
|
279
302
|
|
|
280
303
|
/**
|
|
281
304
|
* This method deserializes a sketch from a given stream.
|
|
282
305
|
* @param is input stream
|
|
283
|
-
* @param instance of
|
|
284
|
-
* @
|
|
285
|
-
*
|
|
286
|
-
* Deprecated, to be removed in the next major version
|
|
287
|
-
*/
|
|
288
|
-
static req_sketch deserialize(std::istream& is, const Allocator& allocator = Allocator());
|
|
289
|
-
|
|
290
|
-
/**
|
|
291
|
-
* This method deserializes a sketch from a given stream.
|
|
292
|
-
* @param is input stream
|
|
293
|
-
* @param instance of a SerDe
|
|
294
|
-
* @param instance of an Allocator
|
|
295
|
-
* @return an instance of a sketch
|
|
296
|
-
*/
|
|
297
|
-
template<typename SerDe = S>
|
|
298
|
-
static req_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
|
|
299
|
-
|
|
300
|
-
/**
|
|
301
|
-
* This method deserializes a sketch from a given array of bytes.
|
|
302
|
-
* @param bytes pointer to the array of bytes
|
|
303
|
-
* @param size the size of the array
|
|
304
|
-
* @param instance of an Allocator
|
|
306
|
+
* @param sd instance of a SerDe
|
|
307
|
+
* @param comparator instance of a Comparator
|
|
308
|
+
* @param allocator instance of an Allocator
|
|
305
309
|
* @return an instance of a sketch
|
|
306
|
-
*
|
|
307
|
-
* Deprecated, to be removed in the next major version
|
|
308
310
|
*/
|
|
309
|
-
|
|
311
|
+
template<typename SerDe = serde<T>>
|
|
312
|
+
static req_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(),
|
|
313
|
+
const Comparator& comparator = Comparator(), const Allocator& allocator = Allocator());
|
|
310
314
|
|
|
311
315
|
/**
|
|
312
316
|
* This method deserializes a sketch from a given array of bytes.
|
|
313
317
|
* @param bytes pointer to the array of bytes
|
|
314
318
|
* @param size the size of the array
|
|
315
|
-
* @param instance of a SerDe
|
|
316
|
-
* @param instance of
|
|
319
|
+
* @param sd instance of a SerDe
|
|
320
|
+
* @param comparator instance of a Comparator
|
|
321
|
+
* @param allocator instance of an Allocator
|
|
317
322
|
* @return an instance of a sketch
|
|
318
323
|
*/
|
|
319
|
-
template<typename SerDe =
|
|
320
|
-
static req_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(),
|
|
324
|
+
template<typename SerDe = serde<T>>
|
|
325
|
+
static req_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(),
|
|
326
|
+
const Comparator& comparator = Comparator(), const Allocator& allocator = Allocator());
|
|
321
327
|
|
|
322
328
|
/**
|
|
323
329
|
* Prints a summary of the sketch.
|
|
@@ -330,10 +336,10 @@ public:
|
|
|
330
336
|
const_iterator begin() const;
|
|
331
337
|
const_iterator end() const;
|
|
332
338
|
|
|
333
|
-
|
|
334
|
-
quantile_sketch_sorted_view<T, Comparator, Allocator> get_sorted_view(bool cumulative) const;
|
|
339
|
+
quantiles_sorted_view<T, Comparator, Allocator> get_sorted_view() const;
|
|
335
340
|
|
|
336
341
|
private:
|
|
342
|
+
Comparator comparator_;
|
|
337
343
|
Allocator allocator_;
|
|
338
344
|
uint16_t k_;
|
|
339
345
|
bool hra_;
|
|
@@ -341,8 +347,12 @@ private:
|
|
|
341
347
|
uint32_t num_retained_;
|
|
342
348
|
uint64_t n_;
|
|
343
349
|
std::vector<Compactor, AllocCompactor> compactors_;
|
|
344
|
-
T*
|
|
345
|
-
T*
|
|
350
|
+
T* min_item_;
|
|
351
|
+
T* max_item_;
|
|
352
|
+
mutable quantiles_sorted_view<T, Comparator, Allocator>* sorted_view_;
|
|
353
|
+
|
|
354
|
+
void setup_sorted_view() const; // modifies mutable state
|
|
355
|
+
void reset_sorted_view();
|
|
346
356
|
|
|
347
357
|
static const bool LAZY_COMPRESSION = false;
|
|
348
358
|
|
|
@@ -366,75 +376,44 @@ private:
|
|
|
366
376
|
|
|
367
377
|
// for deserialization
|
|
368
378
|
class item_deleter;
|
|
369
|
-
req_sketch(uint16_t k, bool hra, uint64_t n,
|
|
379
|
+
req_sketch(uint16_t k, bool hra, uint64_t n,
|
|
380
|
+
std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item,
|
|
381
|
+
std::vector<Compactor, AllocCompactor>&& compactors, const Comparator& comparator);
|
|
370
382
|
|
|
371
383
|
static void check_preamble_ints(uint8_t preamble_ints, uint8_t num_levels);
|
|
372
384
|
static void check_serial_version(uint8_t serial_version);
|
|
373
385
|
static void check_family_id(uint8_t family_id);
|
|
374
386
|
|
|
375
|
-
// implementations for floating point types
|
|
376
387
|
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
|
|
377
|
-
static const TT&
|
|
378
|
-
|
|
379
|
-
return value;
|
|
388
|
+
static inline bool check_update_item(const TT& item) {
|
|
389
|
+
return !std::isnan(item);
|
|
380
390
|
}
|
|
381
391
|
|
|
382
|
-
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
|
|
383
|
-
static inline bool check_update_value(const TT& value) {
|
|
384
|
-
return !std::isnan(value);
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
|
|
388
|
-
static inline void check_split_points(const T* values, uint32_t size) {
|
|
389
|
-
for (uint32_t i = 0; i < size ; i++) {
|
|
390
|
-
if (std::isnan(values[i])) {
|
|
391
|
-
throw std::invalid_argument("Values must not be NaN");
|
|
392
|
-
}
|
|
393
|
-
if ((i < (size - 1)) && !(Comparator()(values[i], values[i + 1]))) {
|
|
394
|
-
throw std::invalid_argument("Values must be unique and monotonically increasing");
|
|
395
|
-
}
|
|
396
|
-
}
|
|
397
|
-
}
|
|
398
|
-
|
|
399
|
-
// implementations for all other types
|
|
400
392
|
template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
|
|
401
|
-
static const TT&
|
|
402
|
-
throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of values");
|
|
403
|
-
}
|
|
404
|
-
|
|
405
|
-
template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
|
|
406
|
-
static inline bool check_update_value(const TT&) {
|
|
393
|
+
static inline bool check_update_item(const TT&) {
|
|
407
394
|
return true;
|
|
408
395
|
}
|
|
409
396
|
|
|
410
|
-
template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
|
|
411
|
-
static inline void check_split_points(const T* values, uint32_t size) {
|
|
412
|
-
for (uint32_t i = 0; i < size ; i++) {
|
|
413
|
-
if ((i < (size - 1)) && !(Comparator()(values[i], values[i + 1]))) {
|
|
414
|
-
throw std::invalid_argument("Values must be unique and monotonically increasing");
|
|
415
|
-
}
|
|
416
|
-
}
|
|
417
|
-
}
|
|
418
|
-
|
|
419
397
|
// for type converting constructor
|
|
420
|
-
template<typename TT, typename CC, typename
|
|
421
|
-
friend class req_sketch;
|
|
398
|
+
template<typename TT, typename CC, typename AA> friend class req_sketch;
|
|
422
399
|
};
|
|
423
400
|
|
|
424
|
-
template<typename T, typename C, typename
|
|
425
|
-
class req_sketch<T, C,
|
|
401
|
+
template<typename T, typename C, typename A>
|
|
402
|
+
class req_sketch<T, C, A>::const_iterator: public std::iterator<std::input_iterator_tag, T> {
|
|
426
403
|
public:
|
|
404
|
+
using value_type = std::pair<const T&, const uint64_t>;
|
|
427
405
|
const_iterator& operator++();
|
|
428
406
|
const_iterator& operator++(int);
|
|
429
407
|
bool operator==(const const_iterator& other) const;
|
|
430
408
|
bool operator!=(const const_iterator& other) const;
|
|
431
|
-
|
|
409
|
+
const value_type operator*() const;
|
|
410
|
+
const return_value_holder<value_type> operator->() const;
|
|
432
411
|
private:
|
|
433
412
|
using LevelsIterator = typename std::vector<Compactor, AllocCompactor>::const_iterator;
|
|
434
413
|
LevelsIterator levels_it_;
|
|
435
414
|
LevelsIterator levels_end_;
|
|
436
415
|
const T* compactor_it_;
|
|
437
|
-
friend class req_sketch<T, C,
|
|
416
|
+
friend class req_sketch<T, C, A>;
|
|
438
417
|
const_iterator(LevelsIterator begin, LevelsIterator end);
|
|
439
418
|
};
|
|
440
419
|
|