datasketches 0.2.7 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/ext/datasketches/kll_wrapper.cpp +20 -20
- data/ext/datasketches/theta_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
- data/vendor/datasketches-cpp/MANIFEST.in +21 -2
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
- data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
- data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
- data/vendor/datasketches-cpp/pyproject.toml +17 -12
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +63 -68
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
- data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/setup.py +14 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
- data/vendor/datasketches-cpp/tox.ini +26 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
- data/vendor/datasketches-cpp/version.cfg.in +1 -0
- metadata +15 -6
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -20,18 +20,17 @@
|
|
20
20
|
#ifndef REQ_SKETCH_HPP_
|
21
21
|
#define REQ_SKETCH_HPP_
|
22
22
|
|
23
|
+
#include <iterator>
|
24
|
+
|
23
25
|
#include "req_common.hpp"
|
24
26
|
#include "req_compactor.hpp"
|
25
|
-
#include "
|
26
|
-
|
27
|
-
#include <stdexcept>
|
27
|
+
#include "quantiles_sorted_view.hpp"
|
28
28
|
|
29
29
|
namespace datasketches {
|
30
30
|
|
31
31
|
template<
|
32
32
|
typename T,
|
33
33
|
typename Comparator = std::less<T>, // strict weak ordering function (see C++ named requirements: Compare)
|
34
|
-
typename S = serde<T>, // deprecated, to be removed in the next major version
|
35
34
|
typename Allocator = std::allocator<T>
|
36
35
|
>
|
37
36
|
class req_sketch {
|
@@ -40,7 +39,6 @@ public:
|
|
40
39
|
using comparator = Comparator;
|
41
40
|
using Compactor = req_compactor<T, Comparator, Allocator>;
|
42
41
|
using AllocCompactor = typename std::allocator_traits<Allocator>::template rebind_alloc<Compactor>;
|
43
|
-
using vector_double = std::vector<double, typename std::allocator_traits<Allocator>::template rebind_alloc<double>>;
|
44
42
|
|
45
43
|
/**
|
46
44
|
* Constructor
|
@@ -48,9 +46,11 @@ public:
|
|
48
46
|
* Value of 12 roughly corresponds to 1% relative error guarantee at 95% confidence.
|
49
47
|
* @param hra if true, the default, the high ranks are prioritized for better
|
50
48
|
* accuracy. Otherwise the low ranks are prioritized for better accuracy.
|
49
|
+
* @param comparator to use by this instance
|
51
50
|
* @param allocator to use by this instance
|
52
51
|
*/
|
53
|
-
explicit req_sketch(uint16_t k, bool hra = true, const
|
52
|
+
explicit req_sketch(uint16_t k, bool hra = true, const Comparator& comparator = Comparator(),
|
53
|
+
const Allocator& allocator = Allocator());
|
54
54
|
|
55
55
|
~req_sketch();
|
56
56
|
req_sketch(const req_sketch& other);
|
@@ -61,10 +61,12 @@ public:
|
|
61
61
|
/*
|
62
62
|
* Type converting constructor.
|
63
63
|
* @param other sketch of a different type
|
64
|
+
* @param comparator instance of a Comparator
|
64
65
|
* @param allocator instance of an Allocator
|
65
66
|
*/
|
66
|
-
template<typename TT, typename CC, typename
|
67
|
-
explicit req_sketch(const req_sketch<TT, CC,
|
67
|
+
template<typename TT, typename CC, typename AA>
|
68
|
+
explicit req_sketch(const req_sketch<TT, CC, AA>& other, const Comparator& comparator = Comparator(),
|
69
|
+
const Allocator& allocator = Allocator());
|
68
70
|
|
69
71
|
/**
|
70
72
|
* Returns configured parameter K
|
@@ -102,27 +104,33 @@ public:
|
|
102
104
|
*/
|
103
105
|
bool is_estimation_mode() const;
|
104
106
|
|
107
|
+
/**
|
108
|
+
* Updates this sketch with the given data item.
|
109
|
+
* @param item from a stream of items
|
110
|
+
*/
|
105
111
|
template<typename FwdT>
|
106
112
|
void update(FwdT&& item);
|
107
113
|
|
114
|
+
/**
|
115
|
+
* Merges another sketch into this one.
|
116
|
+
* @param other sketch to merge into this one
|
117
|
+
*/
|
108
118
|
template<typename FwdSk>
|
109
119
|
void merge(FwdSk&& other);
|
110
120
|
|
111
121
|
/**
|
112
|
-
* Returns the min
|
113
|
-
*
|
114
|
-
*
|
115
|
-
* @return the min value of the stream
|
122
|
+
* Returns the min item of the stream.
|
123
|
+
* If the sketch is empty this throws std::runtime_error.
|
124
|
+
* @return the min item of the stream
|
116
125
|
*/
|
117
|
-
const T&
|
126
|
+
const T& get_min_item() const;
|
118
127
|
|
119
128
|
/**
|
120
|
-
* Returns the max
|
121
|
-
*
|
122
|
-
*
|
123
|
-
* @return the max value of the stream
|
129
|
+
* Returns the max item of the stream.
|
130
|
+
* If the sketch is empty this throws std::runtime_error.
|
131
|
+
* @return the max item of the stream
|
124
132
|
*/
|
125
|
-
const T&
|
133
|
+
const T& get_max_item() const;
|
126
134
|
|
127
135
|
/**
|
128
136
|
* Returns an instance of the comparator for this sketch.
|
@@ -131,84 +139,99 @@ public:
|
|
131
139
|
Comparator get_comparator() const;
|
132
140
|
|
133
141
|
/**
|
134
|
-
* Returns an
|
135
|
-
*
|
136
|
-
|
142
|
+
* Returns an instance of the allocator for this sketch.
|
143
|
+
* @return allocator
|
144
|
+
*/
|
145
|
+
Allocator get_allocator() const;
|
146
|
+
|
147
|
+
/**
|
148
|
+
* Returns an approximation to the normalized rank of the given item from 0 to 1 inclusive.
|
149
|
+
*
|
150
|
+
* <p>If the sketch is empty this throws std::runtime_error.
|
137
151
|
*
|
138
|
-
*
|
152
|
+
* @param item to be ranked.
|
153
|
+
* @param inclusive if true the weight of the given item is included into the rank.
|
154
|
+
* Otherwise the rank equals the sum of the weights of all items that are less than the given item
|
155
|
+
* according to the comparator C.
|
139
156
|
*
|
140
|
-
* @param item to be ranked
|
141
157
|
* @return an approximate rank of the given item
|
142
158
|
*/
|
143
|
-
|
144
|
-
double get_rank(const T& item) const;
|
159
|
+
double get_rank(const T& item, bool inclusive = true) const;
|
145
160
|
|
146
161
|
/**
|
147
162
|
* Returns an approximation to the Probability Mass Function (PMF) of the input stream
|
148
|
-
* given a set of split points (
|
163
|
+
* given a set of split points (items).
|
149
164
|
*
|
150
|
-
* <p>If the sketch is empty this
|
165
|
+
* <p>If the sketch is empty this throws std::runtime_error.
|
151
166
|
*
|
152
|
-
* @param split_points an array of <i>m</i> unique, monotonically increasing
|
153
|
-
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
|
154
|
-
*
|
155
|
-
*
|
156
|
-
*
|
157
|
-
*
|
158
|
-
*
|
167
|
+
* @param split_points an array of <i>m</i> unique, monotonically increasing items
|
168
|
+
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals (bins).
|
169
|
+
*
|
170
|
+
* @param size the number of split points in the array
|
171
|
+
*
|
172
|
+
* @param inclusive if true the rank of an item includes its own weight, and therefore
|
173
|
+
* if the sketch contains items equal to a slit point, then in PMF such items are
|
174
|
+
* included into the interval to the left of split point. Otherwise they are included into the interval
|
175
|
+
* to the right of split point.
|
159
176
|
*
|
160
177
|
* @return an array of m+1 doubles each of which is an approximation
|
161
|
-
* to the fraction of the input stream
|
162
|
-
* If the template parameter inclusive=false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
|
163
|
-
* split point, with the exception that the last interval will include the maximum value.
|
164
|
-
* If the template parameter inclusive=true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
|
165
|
-
* split point.
|
178
|
+
* to the fraction of the input stream items (the mass) that fall into one of those intervals.
|
166
179
|
*/
|
167
|
-
|
168
|
-
vector_double get_PMF(const T* split_points, uint32_t size) const;
|
180
|
+
using vector_double = typename quantiles_sorted_view<T, Comparator, Allocator>::vector_double;
|
181
|
+
vector_double get_PMF(const T* split_points, uint32_t size, bool inclusive = true) const;
|
169
182
|
|
170
183
|
/**
|
171
184
|
* Returns an approximation to the Cumulative Distribution Function (CDF), which is the
|
172
|
-
* cumulative analog of the PMF, of the input stream given a set of split points (
|
185
|
+
* cumulative analog of the PMF, of the input stream given a set of split points (items).
|
173
186
|
*
|
174
|
-
* <p>If the sketch is empty this
|
187
|
+
* <p>If the sketch is empty this throws std::runtime_error.
|
175
188
|
*
|
176
|
-
* @param split_points an array of <i>m</i> unique, monotonically increasing
|
189
|
+
* @param split_points an array of <i>m</i> unique, monotonically increasing items
|
177
190
|
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
|
178
|
-
* If the template parameter inclusive=false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
|
179
|
-
* split point, with the exception that the last interval will include the maximum value.
|
180
|
-
* If the template parameter inclusive=true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
|
181
|
-
* split point.
|
182
|
-
* It is not necessary to include either the min or max values in these split points.
|
183
191
|
*
|
184
|
-
* @
|
192
|
+
* @param size the number of split points in the array
|
193
|
+
*
|
194
|
+
* @param inclusive if true the rank of an item includes its own weight, and therefore
|
195
|
+
* if the sketch contains items equal to a slit point, then in CDF such items are
|
196
|
+
* included into the interval to the left of split point. Otherwise they are included into
|
197
|
+
* the interval to the right of split point.
|
198
|
+
*
|
199
|
+
* @return an array of m+1 doubles, which are a consecutive approximation to the CDF
|
185
200
|
* of the input stream given the split_points. The value at array position j of the returned
|
186
201
|
* CDF array is the sum of the returned values in positions 0 through j of the returned PMF
|
187
|
-
* array.
|
202
|
+
* array. This can be viewed as array of ranks of the given split points plus one more value
|
203
|
+
* that is always 1.
|
188
204
|
*/
|
189
|
-
|
190
|
-
vector_double get_CDF(const T* split_points, uint32_t size) const;
|
205
|
+
vector_double get_CDF(const T* split_points, uint32_t size, bool inclusive = true) const;
|
191
206
|
|
192
207
|
/**
|
193
208
|
* Returns an approximate quantile of the given normalized rank.
|
194
209
|
* The normalized rank must be in the range [0.0, 1.0] (both inclusive).
|
195
|
-
*
|
196
|
-
*
|
210
|
+
* <p>If the sketch is empty this throws std::runtime_error.
|
211
|
+
*
|
212
|
+
* @param rank of an item in the hypothetical sorted stream.
|
213
|
+
* @param inclusive if true, the given rank is considered inclusive (includes weight of an item)
|
214
|
+
*
|
215
|
+
* @return approximate quantile associated with the given rank
|
197
216
|
*/
|
198
|
-
using quantile_return_type = typename
|
199
|
-
|
200
|
-
quantile_return_type get_quantile(double rank) const;
|
217
|
+
using quantile_return_type = typename quantiles_sorted_view<T, Comparator, Allocator>::quantile_return_type;
|
218
|
+
quantile_return_type get_quantile(double rank, bool inclusive = true) const;
|
201
219
|
|
202
220
|
/**
|
203
221
|
* Returns an array of quantiles that correspond to the given array of normalized ranks.
|
222
|
+
* <p>If the sketch is empty this throws std::runtime_error.
|
223
|
+
*
|
204
224
|
* @param ranks given array of normalized ranks.
|
225
|
+
* @param size the number of ranks in the array.
|
226
|
+
*
|
205
227
|
* @return array of quantiles that correspond to the given array of normalized ranks
|
228
|
+
*
|
229
|
+
* Deprecated. Will be removed in the next major version. Use get_quantile() instead.
|
206
230
|
*/
|
207
|
-
|
208
|
-
std::vector<T, Allocator> get_quantiles(const double* ranks, uint32_t size) const;
|
231
|
+
std::vector<T, Allocator> get_quantiles(const double* ranks, uint32_t size, bool inclusive = true) const;
|
209
232
|
|
210
233
|
/**
|
211
|
-
* Returns an approximate lower bound of the given
|
234
|
+
* Returns an approximate lower bound of the given normalized rank.
|
212
235
|
* @param rank the given rank, a value between 0 and 1.0.
|
213
236
|
* @param num_std_dev the number of standard deviations. Must be 1, 2, or 3.
|
214
237
|
* @return an approximate lower bound rank.
|
@@ -216,7 +239,7 @@ public:
|
|
216
239
|
double get_rank_lower_bound(double rank, uint8_t num_std_dev) const;
|
217
240
|
|
218
241
|
/**
|
219
|
-
* Returns an approximate upper bound of the given
|
242
|
+
* Returns an approximate upper bound of the given normalized rank.
|
220
243
|
* @param rank the given rank, a value between 0 and 1.0.
|
221
244
|
* @param num_std_dev the number of standard deviations. Must be 1, 2, or 3.
|
222
245
|
* @return an approximate upper bound rank.
|
@@ -239,27 +262,27 @@ public:
|
|
239
262
|
/**
|
240
263
|
* Computes size needed to serialize the current state of the sketch.
|
241
264
|
* This version is for fixed-size arithmetic types (integral and floating point).
|
242
|
-
* @param instance of a SerDe
|
265
|
+
* @param sd instance of a SerDe
|
243
266
|
* @return size in bytes needed to serialize this sketch
|
244
267
|
*/
|
245
|
-
template<typename TT = T, typename SerDe =
|
268
|
+
template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
|
246
269
|
size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
|
247
270
|
|
248
271
|
/**
|
249
272
|
* Computes size needed to serialize the current state of the sketch.
|
250
273
|
* This version is for all other types and can be expensive since every item needs to be looked at.
|
251
|
-
* @param instance of a SerDe
|
274
|
+
* @param sd instance of a SerDe
|
252
275
|
* @return size in bytes needed to serialize this sketch
|
253
276
|
*/
|
254
|
-
template<typename TT = T, typename SerDe =
|
277
|
+
template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
|
255
278
|
size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
|
256
279
|
|
257
280
|
/**
|
258
281
|
* This method serializes the sketch into a given stream in a binary form
|
259
282
|
* @param os output stream
|
260
|
-
* @param instance of a SerDe
|
283
|
+
* @param sd instance of a SerDe
|
261
284
|
*/
|
262
|
-
template<typename SerDe =
|
285
|
+
template<typename SerDe = serde<T>>
|
263
286
|
void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
|
264
287
|
|
265
288
|
// This is a convenience alias for users
|
@@ -272,52 +295,35 @@ public:
|
|
272
295
|
* It is a blank space of a given size.
|
273
296
|
* This header is used in Datasketches PostgreSQL extension.
|
274
297
|
* @param header_size_bytes space to reserve in front of the sketch
|
275
|
-
* @param instance of a SerDe
|
298
|
+
* @param sd instance of a SerDe
|
276
299
|
*/
|
277
|
-
template<typename SerDe =
|
300
|
+
template<typename SerDe = serde<T>>
|
278
301
|
vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
|
279
302
|
|
280
303
|
/**
|
281
304
|
* This method deserializes a sketch from a given stream.
|
282
305
|
* @param is input stream
|
283
|
-
* @param instance of
|
284
|
-
* @
|
285
|
-
*
|
286
|
-
* Deprecated, to be removed in the next major version
|
287
|
-
*/
|
288
|
-
static req_sketch deserialize(std::istream& is, const Allocator& allocator = Allocator());
|
289
|
-
|
290
|
-
/**
|
291
|
-
* This method deserializes a sketch from a given stream.
|
292
|
-
* @param is input stream
|
293
|
-
* @param instance of a SerDe
|
294
|
-
* @param instance of an Allocator
|
295
|
-
* @return an instance of a sketch
|
296
|
-
*/
|
297
|
-
template<typename SerDe = S>
|
298
|
-
static req_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
|
299
|
-
|
300
|
-
/**
|
301
|
-
* This method deserializes a sketch from a given array of bytes.
|
302
|
-
* @param bytes pointer to the array of bytes
|
303
|
-
* @param size the size of the array
|
304
|
-
* @param instance of an Allocator
|
306
|
+
* @param sd instance of a SerDe
|
307
|
+
* @param comparator instance of a Comparator
|
308
|
+
* @param allocator instance of an Allocator
|
305
309
|
* @return an instance of a sketch
|
306
|
-
*
|
307
|
-
* Deprecated, to be removed in the next major version
|
308
310
|
*/
|
309
|
-
|
311
|
+
template<typename SerDe = serde<T>>
|
312
|
+
static req_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(),
|
313
|
+
const Comparator& comparator = Comparator(), const Allocator& allocator = Allocator());
|
310
314
|
|
311
315
|
/**
|
312
316
|
* This method deserializes a sketch from a given array of bytes.
|
313
317
|
* @param bytes pointer to the array of bytes
|
314
318
|
* @param size the size of the array
|
315
|
-
* @param instance of a SerDe
|
316
|
-
* @param instance of
|
319
|
+
* @param sd instance of a SerDe
|
320
|
+
* @param comparator instance of a Comparator
|
321
|
+
* @param allocator instance of an Allocator
|
317
322
|
* @return an instance of a sketch
|
318
323
|
*/
|
319
|
-
template<typename SerDe =
|
320
|
-
static req_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(),
|
324
|
+
template<typename SerDe = serde<T>>
|
325
|
+
static req_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(),
|
326
|
+
const Comparator& comparator = Comparator(), const Allocator& allocator = Allocator());
|
321
327
|
|
322
328
|
/**
|
323
329
|
* Prints a summary of the sketch.
|
@@ -330,10 +336,10 @@ public:
|
|
330
336
|
const_iterator begin() const;
|
331
337
|
const_iterator end() const;
|
332
338
|
|
333
|
-
|
334
|
-
quantile_sketch_sorted_view<T, Comparator, Allocator> get_sorted_view(bool cumulative) const;
|
339
|
+
quantiles_sorted_view<T, Comparator, Allocator> get_sorted_view() const;
|
335
340
|
|
336
341
|
private:
|
342
|
+
Comparator comparator_;
|
337
343
|
Allocator allocator_;
|
338
344
|
uint16_t k_;
|
339
345
|
bool hra_;
|
@@ -341,8 +347,12 @@ private:
|
|
341
347
|
uint32_t num_retained_;
|
342
348
|
uint64_t n_;
|
343
349
|
std::vector<Compactor, AllocCompactor> compactors_;
|
344
|
-
T*
|
345
|
-
T*
|
350
|
+
T* min_item_;
|
351
|
+
T* max_item_;
|
352
|
+
mutable quantiles_sorted_view<T, Comparator, Allocator>* sorted_view_;
|
353
|
+
|
354
|
+
void setup_sorted_view() const; // modifies mutable state
|
355
|
+
void reset_sorted_view();
|
346
356
|
|
347
357
|
static const bool LAZY_COMPRESSION = false;
|
348
358
|
|
@@ -366,75 +376,44 @@ private:
|
|
366
376
|
|
367
377
|
// for deserialization
|
368
378
|
class item_deleter;
|
369
|
-
req_sketch(uint16_t k, bool hra, uint64_t n,
|
379
|
+
req_sketch(uint16_t k, bool hra, uint64_t n,
|
380
|
+
std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item,
|
381
|
+
std::vector<Compactor, AllocCompactor>&& compactors, const Comparator& comparator);
|
370
382
|
|
371
383
|
static void check_preamble_ints(uint8_t preamble_ints, uint8_t num_levels);
|
372
384
|
static void check_serial_version(uint8_t serial_version);
|
373
385
|
static void check_family_id(uint8_t family_id);
|
374
386
|
|
375
|
-
// implementations for floating point types
|
376
387
|
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
|
377
|
-
static const TT&
|
378
|
-
|
379
|
-
return value;
|
388
|
+
static inline bool check_update_item(const TT& item) {
|
389
|
+
return !std::isnan(item);
|
380
390
|
}
|
381
391
|
|
382
|
-
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
|
383
|
-
static inline bool check_update_value(const TT& value) {
|
384
|
-
return !std::isnan(value);
|
385
|
-
}
|
386
|
-
|
387
|
-
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
|
388
|
-
static inline void check_split_points(const T* values, uint32_t size) {
|
389
|
-
for (uint32_t i = 0; i < size ; i++) {
|
390
|
-
if (std::isnan(values[i])) {
|
391
|
-
throw std::invalid_argument("Values must not be NaN");
|
392
|
-
}
|
393
|
-
if ((i < (size - 1)) && !(Comparator()(values[i], values[i + 1]))) {
|
394
|
-
throw std::invalid_argument("Values must be unique and monotonically increasing");
|
395
|
-
}
|
396
|
-
}
|
397
|
-
}
|
398
|
-
|
399
|
-
// implementations for all other types
|
400
392
|
template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
|
401
|
-
static const TT&
|
402
|
-
throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of values");
|
403
|
-
}
|
404
|
-
|
405
|
-
template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
|
406
|
-
static inline bool check_update_value(const TT&) {
|
393
|
+
static inline bool check_update_item(const TT&) {
|
407
394
|
return true;
|
408
395
|
}
|
409
396
|
|
410
|
-
template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
|
411
|
-
static inline void check_split_points(const T* values, uint32_t size) {
|
412
|
-
for (uint32_t i = 0; i < size ; i++) {
|
413
|
-
if ((i < (size - 1)) && !(Comparator()(values[i], values[i + 1]))) {
|
414
|
-
throw std::invalid_argument("Values must be unique and monotonically increasing");
|
415
|
-
}
|
416
|
-
}
|
417
|
-
}
|
418
|
-
|
419
397
|
// for type converting constructor
|
420
|
-
template<typename TT, typename CC, typename
|
421
|
-
friend class req_sketch;
|
398
|
+
template<typename TT, typename CC, typename AA> friend class req_sketch;
|
422
399
|
};
|
423
400
|
|
424
|
-
template<typename T, typename C, typename
|
425
|
-
class req_sketch<T, C,
|
401
|
+
template<typename T, typename C, typename A>
|
402
|
+
class req_sketch<T, C, A>::const_iterator: public std::iterator<std::input_iterator_tag, T> {
|
426
403
|
public:
|
404
|
+
using value_type = std::pair<const T&, const uint64_t>;
|
427
405
|
const_iterator& operator++();
|
428
406
|
const_iterator& operator++(int);
|
429
407
|
bool operator==(const const_iterator& other) const;
|
430
408
|
bool operator!=(const const_iterator& other) const;
|
431
|
-
|
409
|
+
const value_type operator*() const;
|
410
|
+
const return_value_holder<value_type> operator->() const;
|
432
411
|
private:
|
433
412
|
using LevelsIterator = typename std::vector<Compactor, AllocCompactor>::const_iterator;
|
434
413
|
LevelsIterator levels_it_;
|
435
414
|
LevelsIterator levels_end_;
|
436
415
|
const T* compactor_it_;
|
437
|
-
friend class req_sketch<T, C,
|
416
|
+
friend class req_sketch<T, C, A>;
|
438
417
|
const_iterator(LevelsIterator begin, LevelsIterator end);
|
439
418
|
};
|
440
419
|
|