datasketches 0.2.7 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -20,18 +20,17 @@
20
20
  #ifndef REQ_SKETCH_HPP_
21
21
  #define REQ_SKETCH_HPP_
22
22
 
23
+ #include <iterator>
24
+
23
25
  #include "req_common.hpp"
24
26
  #include "req_compactor.hpp"
25
- #include "quantile_sketch_sorted_view.hpp"
26
-
27
- #include <stdexcept>
27
+ #include "quantiles_sorted_view.hpp"
28
28
 
29
29
  namespace datasketches {
30
30
 
31
31
  template<
32
32
  typename T,
33
33
  typename Comparator = std::less<T>, // strict weak ordering function (see C++ named requirements: Compare)
34
- typename S = serde<T>, // deprecated, to be removed in the next major version
35
34
  typename Allocator = std::allocator<T>
36
35
  >
37
36
  class req_sketch {
@@ -40,7 +39,6 @@ public:
40
39
  using comparator = Comparator;
41
40
  using Compactor = req_compactor<T, Comparator, Allocator>;
42
41
  using AllocCompactor = typename std::allocator_traits<Allocator>::template rebind_alloc<Compactor>;
43
- using vector_double = std::vector<double, typename std::allocator_traits<Allocator>::template rebind_alloc<double>>;
44
42
 
45
43
  /**
46
44
  * Constructor
@@ -48,9 +46,11 @@ public:
48
46
  * Value of 12 roughly corresponds to 1% relative error guarantee at 95% confidence.
49
47
  * @param hra if true, the default, the high ranks are prioritized for better
50
48
  * accuracy. Otherwise the low ranks are prioritized for better accuracy.
49
+ * @param comparator to use by this instance
51
50
  * @param allocator to use by this instance
52
51
  */
53
- explicit req_sketch(uint16_t k, bool hra = true, const Allocator& allocator = Allocator());
52
+ explicit req_sketch(uint16_t k, bool hra = true, const Comparator& comparator = Comparator(),
53
+ const Allocator& allocator = Allocator());
54
54
 
55
55
  ~req_sketch();
56
56
  req_sketch(const req_sketch& other);
@@ -61,10 +61,12 @@ public:
61
61
  /*
62
62
  * Type converting constructor.
63
63
  * @param other sketch of a different type
64
+ * @param comparator instance of a Comparator
64
65
  * @param allocator instance of an Allocator
65
66
  */
66
- template<typename TT, typename CC, typename SS, typename AA>
67
- explicit req_sketch(const req_sketch<TT, CC, SS, AA>& other, const Allocator& allocator = Allocator());
67
+ template<typename TT, typename CC, typename AA>
68
+ explicit req_sketch(const req_sketch<TT, CC, AA>& other, const Comparator& comparator = Comparator(),
69
+ const Allocator& allocator = Allocator());
68
70
 
69
71
  /**
70
72
  * Returns configured parameter K
@@ -102,27 +104,33 @@ public:
102
104
  */
103
105
  bool is_estimation_mode() const;
104
106
 
107
+ /**
108
+ * Updates this sketch with the given data item.
109
+ * @param item from a stream of items
110
+ */
105
111
  template<typename FwdT>
106
112
  void update(FwdT&& item);
107
113
 
114
+ /**
115
+ * Merges another sketch into this one.
116
+ * @param other sketch to merge into this one
117
+ */
108
118
  template<typename FwdSk>
109
119
  void merge(FwdSk&& other);
110
120
 
111
121
  /**
112
- * Returns the min value of the stream.
113
- * For floating point types: if the sketch is empty this returns NaN.
114
- * For other types: if the sketch is empty this throws runtime_error.
115
- * @return the min value of the stream
122
+ * Returns the min item of the stream.
123
+ * If the sketch is empty this throws std::runtime_error.
124
+ * @return the min item of the stream
116
125
  */
117
- const T& get_min_value() const;
126
+ const T& get_min_item() const;
118
127
 
119
128
  /**
120
- * Returns the max value of the stream.
121
- * For floating point types: if the sketch is empty this returns NaN.
122
- * For other types: if the sketch is empty this throws runtime_error.
123
- * @return the max value of the stream
129
+ * Returns the max item of the stream.
130
+ * If the sketch is empty this throws std::runtime_error.
131
+ * @return the max item of the stream
124
132
  */
125
- const T& get_max_value() const;
133
+ const T& get_max_item() const;
126
134
 
127
135
  /**
128
136
  * Returns an instance of the comparator for this sketch.
@@ -131,84 +139,99 @@ public:
131
139
  Comparator get_comparator() const;
132
140
 
133
141
  /**
134
- * Returns an approximation to the normalized (fractional) rank of the given item from 0 to 1 inclusive.
135
- * With the template parameter inclusive=true the weight of the given item is included into the rank.
136
- * Otherwise the rank equals the sum of the weights of items less than the given item according to the Comparator.
142
+ * Returns an instance of the allocator for this sketch.
143
+ * @return allocator
144
+ */
145
+ Allocator get_allocator() const;
146
+
147
+ /**
148
+ * Returns an approximation to the normalized rank of the given item from 0 to 1 inclusive.
149
+ *
150
+ * <p>If the sketch is empty this throws std::runtime_error.
137
151
  *
138
- * <p>If the sketch is empty this returns NaN.
152
+ * @param item to be ranked.
153
+ * @param inclusive if true the weight of the given item is included into the rank.
154
+ * Otherwise the rank equals the sum of the weights of all items that are less than the given item
155
+ * according to the comparator C.
139
156
  *
140
- * @param item to be ranked
141
157
  * @return an approximate rank of the given item
142
158
  */
143
- template<bool inclusive = false>
144
- double get_rank(const T& item) const;
159
+ double get_rank(const T& item, bool inclusive = true) const;
145
160
 
146
161
  /**
147
162
  * Returns an approximation to the Probability Mass Function (PMF) of the input stream
148
- * given a set of split points (values).
163
+ * given a set of split points (items).
149
164
  *
150
- * <p>If the sketch is empty this returns an empty vector.
165
+ * <p>If the sketch is empty this throws std::runtime_error.
151
166
  *
152
- * @param split_points an array of <i>m</i> unique, monotonically increasing values
153
- * that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
154
- * If the template parameter inclusive=false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
155
- * split point, with the exception that the last interval will include the maximum value.
156
- * If the template parameter inclusive=true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
157
- * split point.
158
- * It is not necessary to include either the min or max values in these split points.
167
+ * @param split_points an array of <i>m</i> unique, monotonically increasing items
168
+ * that divide the input domain into <i>m+1</i> consecutive disjoint intervals (bins).
169
+ *
170
+ * @param size the number of split points in the array
171
+ *
172
+ * @param inclusive if true the rank of an item includes its own weight, and therefore
173
+ * if the sketch contains items equal to a slit point, then in PMF such items are
174
+ * included into the interval to the left of split point. Otherwise they are included into the interval
175
+ * to the right of split point.
159
176
  *
160
177
  * @return an array of m+1 doubles each of which is an approximation
161
- * to the fraction of the input stream values (the mass) that fall into one of those intervals.
162
- * If the template parameter inclusive=false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
163
- * split point, with the exception that the last interval will include the maximum value.
164
- * If the template parameter inclusive=true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
165
- * split point.
178
+ * to the fraction of the input stream items (the mass) that fall into one of those intervals.
166
179
  */
167
- template<bool inclusive = false>
168
- vector_double get_PMF(const T* split_points, uint32_t size) const;
180
+ using vector_double = typename quantiles_sorted_view<T, Comparator, Allocator>::vector_double;
181
+ vector_double get_PMF(const T* split_points, uint32_t size, bool inclusive = true) const;
169
182
 
170
183
  /**
171
184
  * Returns an approximation to the Cumulative Distribution Function (CDF), which is the
172
- * cumulative analog of the PMF, of the input stream given a set of split points (values).
185
+ * cumulative analog of the PMF, of the input stream given a set of split points (items).
173
186
  *
174
- * <p>If the sketch is empty this returns an empty vector.
187
+ * <p>If the sketch is empty this throws std::runtime_error.
175
188
  *
176
- * @param split_points an array of <i>m</i> unique, monotonically increasing float values
189
+ * @param split_points an array of <i>m</i> unique, monotonically increasing items
177
190
  * that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
178
- * If the template parameter inclusive=false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
179
- * split point, with the exception that the last interval will include the maximum value.
180
- * If the template parameter inclusive=true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
181
- * split point.
182
- * It is not necessary to include either the min or max values in these split points.
183
191
  *
184
- * @return an array of m+1 double values, which are a consecutive approximation to the CDF
192
+ * @param size the number of split points in the array
193
+ *
194
+ * @param inclusive if true the rank of an item includes its own weight, and therefore
195
+ * if the sketch contains items equal to a slit point, then in CDF such items are
196
+ * included into the interval to the left of split point. Otherwise they are included into
197
+ * the interval to the right of split point.
198
+ *
199
+ * @return an array of m+1 doubles, which are a consecutive approximation to the CDF
185
200
  * of the input stream given the split_points. The value at array position j of the returned
186
201
  * CDF array is the sum of the returned values in positions 0 through j of the returned PMF
187
- * array.
202
+ * array. This can be viewed as array of ranks of the given split points plus one more value
203
+ * that is always 1.
188
204
  */
189
- template<bool inclusive = false>
190
- vector_double get_CDF(const T* split_points, uint32_t size) const;
205
+ vector_double get_CDF(const T* split_points, uint32_t size, bool inclusive = true) const;
191
206
 
192
207
  /**
193
208
  * Returns an approximate quantile of the given normalized rank.
194
209
  * The normalized rank must be in the range [0.0, 1.0] (both inclusive).
195
- * @param rank the given normalized rank
196
- * @return approximate quantile given the normalized rank
210
+ * <p>If the sketch is empty this throws std::runtime_error.
211
+ *
212
+ * @param rank of an item in the hypothetical sorted stream.
213
+ * @param inclusive if true, the given rank is considered inclusive (includes weight of an item)
214
+ *
215
+ * @return approximate quantile associated with the given rank
197
216
  */
198
- using quantile_return_type = typename quantile_sketch_sorted_view<T, Comparator, Allocator>::quantile_return_type;
199
- template<bool inclusive = false>
200
- quantile_return_type get_quantile(double rank) const;
217
+ using quantile_return_type = typename quantiles_sorted_view<T, Comparator, Allocator>::quantile_return_type;
218
+ quantile_return_type get_quantile(double rank, bool inclusive = true) const;
201
219
 
202
220
  /**
203
221
  * Returns an array of quantiles that correspond to the given array of normalized ranks.
222
+ * <p>If the sketch is empty this throws std::runtime_error.
223
+ *
204
224
  * @param ranks given array of normalized ranks.
225
+ * @param size the number of ranks in the array.
226
+ *
205
227
  * @return array of quantiles that correspond to the given array of normalized ranks
228
+ *
229
+ * Deprecated. Will be removed in the next major version. Use get_quantile() instead.
206
230
  */
207
- template<bool inclusive = false>
208
- std::vector<T, Allocator> get_quantiles(const double* ranks, uint32_t size) const;
231
+ std::vector<T, Allocator> get_quantiles(const double* ranks, uint32_t size, bool inclusive = true) const;
209
232
 
210
233
  /**
211
- * Returns an approximate lower bound of the given noramalized rank.
234
+ * Returns an approximate lower bound of the given normalized rank.
212
235
  * @param rank the given rank, a value between 0 and 1.0.
213
236
  * @param num_std_dev the number of standard deviations. Must be 1, 2, or 3.
214
237
  * @return an approximate lower bound rank.
@@ -216,7 +239,7 @@ public:
216
239
  double get_rank_lower_bound(double rank, uint8_t num_std_dev) const;
217
240
 
218
241
  /**
219
- * Returns an approximate upper bound of the given noramalized rank.
242
+ * Returns an approximate upper bound of the given normalized rank.
220
243
  * @param rank the given rank, a value between 0 and 1.0.
221
244
  * @param num_std_dev the number of standard deviations. Must be 1, 2, or 3.
222
245
  * @return an approximate upper bound rank.
@@ -239,27 +262,27 @@ public:
239
262
  /**
240
263
  * Computes size needed to serialize the current state of the sketch.
241
264
  * This version is for fixed-size arithmetic types (integral and floating point).
242
- * @param instance of a SerDe
265
+ * @param sd instance of a SerDe
243
266
  * @return size in bytes needed to serialize this sketch
244
267
  */
245
- template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
268
+ template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
246
269
  size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
247
270
 
248
271
  /**
249
272
  * Computes size needed to serialize the current state of the sketch.
250
273
  * This version is for all other types and can be expensive since every item needs to be looked at.
251
- * @param instance of a SerDe
274
+ * @param sd instance of a SerDe
252
275
  * @return size in bytes needed to serialize this sketch
253
276
  */
254
- template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
277
+ template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
255
278
  size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
256
279
 
257
280
  /**
258
281
  * This method serializes the sketch into a given stream in a binary form
259
282
  * @param os output stream
260
- * @param instance of a SerDe
283
+ * @param sd instance of a SerDe
261
284
  */
262
- template<typename SerDe = S>
285
+ template<typename SerDe = serde<T>>
263
286
  void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
264
287
 
265
288
  // This is a convenience alias for users
@@ -272,52 +295,35 @@ public:
272
295
  * It is a blank space of a given size.
273
296
  * This header is used in Datasketches PostgreSQL extension.
274
297
  * @param header_size_bytes space to reserve in front of the sketch
275
- * @param instance of a SerDe
298
+ * @param sd instance of a SerDe
276
299
  */
277
- template<typename SerDe = S>
300
+ template<typename SerDe = serde<T>>
278
301
  vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
279
302
 
280
303
  /**
281
304
  * This method deserializes a sketch from a given stream.
282
305
  * @param is input stream
283
- * @param instance of an Allocator
284
- * @return an instance of a sketch
285
- *
286
- * Deprecated, to be removed in the next major version
287
- */
288
- static req_sketch deserialize(std::istream& is, const Allocator& allocator = Allocator());
289
-
290
- /**
291
- * This method deserializes a sketch from a given stream.
292
- * @param is input stream
293
- * @param instance of a SerDe
294
- * @param instance of an Allocator
295
- * @return an instance of a sketch
296
- */
297
- template<typename SerDe = S>
298
- static req_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
299
-
300
- /**
301
- * This method deserializes a sketch from a given array of bytes.
302
- * @param bytes pointer to the array of bytes
303
- * @param size the size of the array
304
- * @param instance of an Allocator
306
+ * @param sd instance of a SerDe
307
+ * @param comparator instance of a Comparator
308
+ * @param allocator instance of an Allocator
305
309
  * @return an instance of a sketch
306
- *
307
- * Deprecated, to be removed in the next major version
308
310
  */
309
- static req_sketch deserialize(const void* bytes, size_t size, const Allocator& allocator = Allocator());
311
+ template<typename SerDe = serde<T>>
312
+ static req_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(),
313
+ const Comparator& comparator = Comparator(), const Allocator& allocator = Allocator());
310
314
 
311
315
  /**
312
316
  * This method deserializes a sketch from a given array of bytes.
313
317
  * @param bytes pointer to the array of bytes
314
318
  * @param size the size of the array
315
- * @param instance of a SerDe
316
- * @param instance of an Allocator
319
+ * @param sd instance of a SerDe
320
+ * @param comparator instance of a Comparator
321
+ * @param allocator instance of an Allocator
317
322
  * @return an instance of a sketch
318
323
  */
319
- template<typename SerDe = S>
320
- static req_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
324
+ template<typename SerDe = serde<T>>
325
+ static req_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(),
326
+ const Comparator& comparator = Comparator(), const Allocator& allocator = Allocator());
321
327
 
322
328
  /**
323
329
  * Prints a summary of the sketch.
@@ -330,10 +336,10 @@ public:
330
336
  const_iterator begin() const;
331
337
  const_iterator end() const;
332
338
 
333
- template<bool inclusive = false>
334
- quantile_sketch_sorted_view<T, Comparator, Allocator> get_sorted_view(bool cumulative) const;
339
+ quantiles_sorted_view<T, Comparator, Allocator> get_sorted_view() const;
335
340
 
336
341
  private:
342
+ Comparator comparator_;
337
343
  Allocator allocator_;
338
344
  uint16_t k_;
339
345
  bool hra_;
@@ -341,8 +347,12 @@ private:
341
347
  uint32_t num_retained_;
342
348
  uint64_t n_;
343
349
  std::vector<Compactor, AllocCompactor> compactors_;
344
- T* min_value_;
345
- T* max_value_;
350
+ T* min_item_;
351
+ T* max_item_;
352
+ mutable quantiles_sorted_view<T, Comparator, Allocator>* sorted_view_;
353
+
354
+ void setup_sorted_view() const; // modifies mutable state
355
+ void reset_sorted_view();
346
356
 
347
357
  static const bool LAZY_COMPRESSION = false;
348
358
 
@@ -366,75 +376,44 @@ private:
366
376
 
367
377
  // for deserialization
368
378
  class item_deleter;
369
- req_sketch(uint16_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors);
379
+ req_sketch(uint16_t k, bool hra, uint64_t n,
380
+ std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item,
381
+ std::vector<Compactor, AllocCompactor>&& compactors, const Comparator& comparator);
370
382
 
371
383
  static void check_preamble_ints(uint8_t preamble_ints, uint8_t num_levels);
372
384
  static void check_serial_version(uint8_t serial_version);
373
385
  static void check_family_id(uint8_t family_id);
374
386
 
375
- // implementations for floating point types
376
387
  template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
377
- static const TT& get_invalid_value() {
378
- static TT value = std::numeric_limits<TT>::quiet_NaN();
379
- return value;
388
+ static inline bool check_update_item(const TT& item) {
389
+ return !std::isnan(item);
380
390
  }
381
391
 
382
- template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
383
- static inline bool check_update_value(const TT& value) {
384
- return !std::isnan(value);
385
- }
386
-
387
- template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
388
- static inline void check_split_points(const T* values, uint32_t size) {
389
- for (uint32_t i = 0; i < size ; i++) {
390
- if (std::isnan(values[i])) {
391
- throw std::invalid_argument("Values must not be NaN");
392
- }
393
- if ((i < (size - 1)) && !(Comparator()(values[i], values[i + 1]))) {
394
- throw std::invalid_argument("Values must be unique and monotonically increasing");
395
- }
396
- }
397
- }
398
-
399
- // implementations for all other types
400
392
  template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
401
- static const TT& get_invalid_value() {
402
- throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of values");
403
- }
404
-
405
- template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
406
- static inline bool check_update_value(const TT&) {
393
+ static inline bool check_update_item(const TT&) {
407
394
  return true;
408
395
  }
409
396
 
410
- template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
411
- static inline void check_split_points(const T* values, uint32_t size) {
412
- for (uint32_t i = 0; i < size ; i++) {
413
- if ((i < (size - 1)) && !(Comparator()(values[i], values[i + 1]))) {
414
- throw std::invalid_argument("Values must be unique and monotonically increasing");
415
- }
416
- }
417
- }
418
-
419
397
  // for type converting constructor
420
- template<typename TT, typename CC, typename SS, typename AA>
421
- friend class req_sketch;
398
+ template<typename TT, typename CC, typename AA> friend class req_sketch;
422
399
  };
423
400
 
424
- template<typename T, typename C, typename S, typename A>
425
- class req_sketch<T, C, S, A>::const_iterator: public std::iterator<std::input_iterator_tag, T> {
401
+ template<typename T, typename C, typename A>
402
+ class req_sketch<T, C, A>::const_iterator: public std::iterator<std::input_iterator_tag, T> {
426
403
  public:
404
+ using value_type = std::pair<const T&, const uint64_t>;
427
405
  const_iterator& operator++();
428
406
  const_iterator& operator++(int);
429
407
  bool operator==(const const_iterator& other) const;
430
408
  bool operator!=(const const_iterator& other) const;
431
- std::pair<const T&, const uint64_t> operator*() const;
409
+ const value_type operator*() const;
410
+ const return_value_holder<value_type> operator->() const;
432
411
  private:
433
412
  using LevelsIterator = typename std::vector<Compactor, AllocCompactor>::const_iterator;
434
413
  LevelsIterator levels_it_;
435
414
  LevelsIterator levels_end_;
436
415
  const T* compactor_it_;
437
- friend class req_sketch<T, C, S, A>;
416
+ friend class req_sketch<T, C, A>;
438
417
  const_iterator(LevelsIterator begin, LevelsIterator end);
439
418
  };
440
419