datasketches 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -20,18 +20,17 @@
20
20
  #ifndef REQ_SKETCH_HPP_
21
21
  #define REQ_SKETCH_HPP_
22
22
 
23
+ #include <iterator>
24
+
23
25
  #include "req_common.hpp"
24
26
  #include "req_compactor.hpp"
25
- #include "quantile_sketch_sorted_view.hpp"
26
-
27
- #include <stdexcept>
27
+ #include "quantiles_sorted_view.hpp"
28
28
 
29
29
  namespace datasketches {
30
30
 
31
31
  template<
32
32
  typename T,
33
33
  typename Comparator = std::less<T>, // strict weak ordering function (see C++ named requirements: Compare)
34
- typename S = serde<T>, // deprecated, to be removed in the next major version
35
34
  typename Allocator = std::allocator<T>
36
35
  >
37
36
  class req_sketch {
@@ -40,7 +39,6 @@ public:
40
39
  using comparator = Comparator;
41
40
  using Compactor = req_compactor<T, Comparator, Allocator>;
42
41
  using AllocCompactor = typename std::allocator_traits<Allocator>::template rebind_alloc<Compactor>;
43
- using vector_double = std::vector<double, typename std::allocator_traits<Allocator>::template rebind_alloc<double>>;
44
42
 
45
43
  /**
46
44
  * Constructor
@@ -48,9 +46,11 @@ public:
48
46
  * Value of 12 roughly corresponds to 1% relative error guarantee at 95% confidence.
49
47
  * @param hra if true, the default, the high ranks are prioritized for better
50
48
  * accuracy. Otherwise the low ranks are prioritized for better accuracy.
49
+ * @param comparator to use by this instance
51
50
  * @param allocator to use by this instance
52
51
  */
53
- explicit req_sketch(uint16_t k, bool hra = true, const Allocator& allocator = Allocator());
52
+ explicit req_sketch(uint16_t k, bool hra = true, const Comparator& comparator = Comparator(),
53
+ const Allocator& allocator = Allocator());
54
54
 
55
55
  ~req_sketch();
56
56
  req_sketch(const req_sketch& other);
@@ -61,10 +61,12 @@ public:
61
61
  /*
62
62
  * Type converting constructor.
63
63
  * @param other sketch of a different type
64
+ * @param comparator instance of a Comparator
64
65
  * @param allocator instance of an Allocator
65
66
  */
66
- template<typename TT, typename CC, typename SS, typename AA>
67
- explicit req_sketch(const req_sketch<TT, CC, SS, AA>& other, const Allocator& allocator = Allocator());
67
+ template<typename TT, typename CC, typename AA>
68
+ explicit req_sketch(const req_sketch<TT, CC, AA>& other, const Comparator& comparator = Comparator(),
69
+ const Allocator& allocator = Allocator());
68
70
 
69
71
  /**
70
72
  * Returns configured parameter K
@@ -102,27 +104,33 @@ public:
102
104
  */
103
105
  bool is_estimation_mode() const;
104
106
 
107
+ /**
108
+ * Updates this sketch with the given data item.
109
+ * @param item from a stream of items
110
+ */
105
111
  template<typename FwdT>
106
112
  void update(FwdT&& item);
107
113
 
114
+ /**
115
+ * Merges another sketch into this one.
116
+ * @param other sketch to merge into this one
117
+ */
108
118
  template<typename FwdSk>
109
119
  void merge(FwdSk&& other);
110
120
 
111
121
  /**
112
- * Returns the min value of the stream.
113
- * For floating point types: if the sketch is empty this returns NaN.
114
- * For other types: if the sketch is empty this throws runtime_error.
115
- * @return the min value of the stream
122
+ * Returns the min item of the stream.
123
+ * If the sketch is empty this throws std::runtime_error.
124
+ * @return the min item of the stream
116
125
  */
117
- const T& get_min_value() const;
126
+ const T& get_min_item() const;
118
127
 
119
128
  /**
120
- * Returns the max value of the stream.
121
- * For floating point types: if the sketch is empty this returns NaN.
122
- * For other types: if the sketch is empty this throws runtime_error.
123
- * @return the max value of the stream
129
+ * Returns the max item of the stream.
130
+ * If the sketch is empty this throws std::runtime_error.
131
+ * @return the max item of the stream
124
132
  */
125
- const T& get_max_value() const;
133
+ const T& get_max_item() const;
126
134
 
127
135
  /**
128
136
  * Returns an instance of the comparator for this sketch.
@@ -131,84 +139,99 @@ public:
131
139
  Comparator get_comparator() const;
132
140
 
133
141
  /**
134
- * Returns an approximation to the normalized (fractional) rank of the given item from 0 to 1 inclusive.
135
- * With the template parameter inclusive=true the weight of the given item is included into the rank.
136
- * Otherwise the rank equals the sum of the weights of items less than the given item according to the Comparator.
142
+ * Returns an instance of the allocator for this sketch.
143
+ * @return allocator
144
+ */
145
+ Allocator get_allocator() const;
146
+
147
+ /**
148
+ * Returns an approximation to the normalized rank of the given item from 0 to 1 inclusive.
149
+ *
150
+ * <p>If the sketch is empty this throws std::runtime_error.
137
151
  *
138
- * <p>If the sketch is empty this returns NaN.
152
+ * @param item to be ranked.
153
+ * @param inclusive if true the weight of the given item is included into the rank.
154
+ * Otherwise the rank equals the sum of the weights of all items that are less than the given item
155
+ * according to the comparator C.
139
156
  *
140
- * @param item to be ranked
141
157
  * @return an approximate rank of the given item
142
158
  */
143
- template<bool inclusive = false>
144
- double get_rank(const T& item) const;
159
+ double get_rank(const T& item, bool inclusive = true) const;
145
160
 
146
161
  /**
147
162
  * Returns an approximation to the Probability Mass Function (PMF) of the input stream
148
- * given a set of split points (values).
163
+ * given a set of split points (items).
149
164
  *
150
- * <p>If the sketch is empty this returns an empty vector.
165
+ * <p>If the sketch is empty this throws std::runtime_error.
151
166
  *
152
- * @param split_points an array of <i>m</i> unique, monotonically increasing values
153
- * that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
154
- * If the template parameter inclusive=false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
155
- * split point, with the exception that the last interval will include the maximum value.
156
- * If the template parameter inclusive=true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
157
- * split point.
158
- * It is not necessary to include either the min or max values in these split points.
167
+ * @param split_points an array of <i>m</i> unique, monotonically increasing items
168
+ * that divide the input domain into <i>m+1</i> consecutive disjoint intervals (bins).
169
+ *
170
+ * @param size the number of split points in the array
171
+ *
172
+ * @param inclusive if true the rank of an item includes its own weight, and therefore
173
+ * if the sketch contains items equal to a slit point, then in PMF such items are
174
+ * included into the interval to the left of split point. Otherwise they are included into the interval
175
+ * to the right of split point.
159
176
  *
160
177
  * @return an array of m+1 doubles each of which is an approximation
161
- * to the fraction of the input stream values (the mass) that fall into one of those intervals.
162
- * If the template parameter inclusive=false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
163
- * split point, with the exception that the last interval will include the maximum value.
164
- * If the template parameter inclusive=true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
165
- * split point.
178
+ * to the fraction of the input stream items (the mass) that fall into one of those intervals.
166
179
  */
167
- template<bool inclusive = false>
168
- vector_double get_PMF(const T* split_points, uint32_t size) const;
180
+ using vector_double = typename quantiles_sorted_view<T, Comparator, Allocator>::vector_double;
181
+ vector_double get_PMF(const T* split_points, uint32_t size, bool inclusive = true) const;
169
182
 
170
183
  /**
171
184
  * Returns an approximation to the Cumulative Distribution Function (CDF), which is the
172
- * cumulative analog of the PMF, of the input stream given a set of split points (values).
185
+ * cumulative analog of the PMF, of the input stream given a set of split points (items).
173
186
  *
174
- * <p>If the sketch is empty this returns an empty vector.
187
+ * <p>If the sketch is empty this throws std::runtime_error.
175
188
  *
176
- * @param split_points an array of <i>m</i> unique, monotonically increasing float values
189
+ * @param split_points an array of <i>m</i> unique, monotonically increasing items
177
190
  * that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
178
- * If the template parameter inclusive=false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
179
- * split point, with the exception that the last interval will include the maximum value.
180
- * If the template parameter inclusive=true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
181
- * split point.
182
- * It is not necessary to include either the min or max values in these split points.
183
191
  *
184
- * @return an array of m+1 double values, which are a consecutive approximation to the CDF
192
+ * @param size the number of split points in the array
193
+ *
194
+ * @param inclusive if true the rank of an item includes its own weight, and therefore
195
+ * if the sketch contains items equal to a slit point, then in CDF such items are
196
+ * included into the interval to the left of split point. Otherwise they are included into
197
+ * the interval to the right of split point.
198
+ *
199
+ * @return an array of m+1 doubles, which are a consecutive approximation to the CDF
185
200
  * of the input stream given the split_points. The value at array position j of the returned
186
201
  * CDF array is the sum of the returned values in positions 0 through j of the returned PMF
187
- * array.
202
+ * array. This can be viewed as array of ranks of the given split points plus one more value
203
+ * that is always 1.
188
204
  */
189
- template<bool inclusive = false>
190
- vector_double get_CDF(const T* split_points, uint32_t size) const;
205
+ vector_double get_CDF(const T* split_points, uint32_t size, bool inclusive = true) const;
191
206
 
192
207
  /**
193
208
  * Returns an approximate quantile of the given normalized rank.
194
209
  * The normalized rank must be in the range [0.0, 1.0] (both inclusive).
195
- * @param rank the given normalized rank
196
- * @return approximate quantile given the normalized rank
210
+ * <p>If the sketch is empty this throws std::runtime_error.
211
+ *
212
+ * @param rank of an item in the hypothetical sorted stream.
213
+ * @param inclusive if true, the given rank is considered inclusive (includes weight of an item)
214
+ *
215
+ * @return approximate quantile associated with the given rank
197
216
  */
198
- using quantile_return_type = typename quantile_sketch_sorted_view<T, Comparator, Allocator>::quantile_return_type;
199
- template<bool inclusive = false>
200
- quantile_return_type get_quantile(double rank) const;
217
+ using quantile_return_type = typename quantiles_sorted_view<T, Comparator, Allocator>::quantile_return_type;
218
+ quantile_return_type get_quantile(double rank, bool inclusive = true) const;
201
219
 
202
220
  /**
203
221
  * Returns an array of quantiles that correspond to the given array of normalized ranks.
222
+ * <p>If the sketch is empty this throws std::runtime_error.
223
+ *
204
224
  * @param ranks given array of normalized ranks.
225
+ * @param size the number of ranks in the array.
226
+ *
205
227
  * @return array of quantiles that correspond to the given array of normalized ranks
228
+ *
229
+ * Deprecated. Will be removed in the next major version. Use get_quantile() instead.
206
230
  */
207
- template<bool inclusive = false>
208
- std::vector<T, Allocator> get_quantiles(const double* ranks, uint32_t size) const;
231
+ std::vector<T, Allocator> get_quantiles(const double* ranks, uint32_t size, bool inclusive = true) const;
209
232
 
210
233
  /**
211
- * Returns an approximate lower bound of the given noramalized rank.
234
+ * Returns an approximate lower bound of the given normalized rank.
212
235
  * @param rank the given rank, a value between 0 and 1.0.
213
236
  * @param num_std_dev the number of standard deviations. Must be 1, 2, or 3.
214
237
  * @return an approximate lower bound rank.
@@ -216,7 +239,7 @@ public:
216
239
  double get_rank_lower_bound(double rank, uint8_t num_std_dev) const;
217
240
 
218
241
  /**
219
- * Returns an approximate upper bound of the given noramalized rank.
242
+ * Returns an approximate upper bound of the given normalized rank.
220
243
  * @param rank the given rank, a value between 0 and 1.0.
221
244
  * @param num_std_dev the number of standard deviations. Must be 1, 2, or 3.
222
245
  * @return an approximate upper bound rank.
@@ -239,27 +262,27 @@ public:
239
262
  /**
240
263
  * Computes size needed to serialize the current state of the sketch.
241
264
  * This version is for fixed-size arithmetic types (integral and floating point).
242
- * @param instance of a SerDe
265
+ * @param sd instance of a SerDe
243
266
  * @return size in bytes needed to serialize this sketch
244
267
  */
245
- template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
268
+ template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
246
269
  size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
247
270
 
248
271
  /**
249
272
  * Computes size needed to serialize the current state of the sketch.
250
273
  * This version is for all other types and can be expensive since every item needs to be looked at.
251
- * @param instance of a SerDe
274
+ * @param sd instance of a SerDe
252
275
  * @return size in bytes needed to serialize this sketch
253
276
  */
254
- template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
277
+ template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
255
278
  size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
256
279
 
257
280
  /**
258
281
  * This method serializes the sketch into a given stream in a binary form
259
282
  * @param os output stream
260
- * @param instance of a SerDe
283
+ * @param sd instance of a SerDe
261
284
  */
262
- template<typename SerDe = S>
285
+ template<typename SerDe = serde<T>>
263
286
  void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
264
287
 
265
288
  // This is a convenience alias for users
@@ -272,52 +295,35 @@ public:
272
295
  * It is a blank space of a given size.
273
296
  * This header is used in Datasketches PostgreSQL extension.
274
297
  * @param header_size_bytes space to reserve in front of the sketch
275
- * @param instance of a SerDe
298
+ * @param sd instance of a SerDe
276
299
  */
277
- template<typename SerDe = S>
300
+ template<typename SerDe = serde<T>>
278
301
  vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
279
302
 
280
303
  /**
281
304
  * This method deserializes a sketch from a given stream.
282
305
  * @param is input stream
283
- * @param instance of an Allocator
284
- * @return an instance of a sketch
285
- *
286
- * Deprecated, to be removed in the next major version
287
- */
288
- static req_sketch deserialize(std::istream& is, const Allocator& allocator = Allocator());
289
-
290
- /**
291
- * This method deserializes a sketch from a given stream.
292
- * @param is input stream
293
- * @param instance of a SerDe
294
- * @param instance of an Allocator
295
- * @return an instance of a sketch
296
- */
297
- template<typename SerDe = S>
298
- static req_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
299
-
300
- /**
301
- * This method deserializes a sketch from a given array of bytes.
302
- * @param bytes pointer to the array of bytes
303
- * @param size the size of the array
304
- * @param instance of an Allocator
306
+ * @param sd instance of a SerDe
307
+ * @param comparator instance of a Comparator
308
+ * @param allocator instance of an Allocator
305
309
  * @return an instance of a sketch
306
- *
307
- * Deprecated, to be removed in the next major version
308
310
  */
309
- static req_sketch deserialize(const void* bytes, size_t size, const Allocator& allocator = Allocator());
311
+ template<typename SerDe = serde<T>>
312
+ static req_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(),
313
+ const Comparator& comparator = Comparator(), const Allocator& allocator = Allocator());
310
314
 
311
315
  /**
312
316
  * This method deserializes a sketch from a given array of bytes.
313
317
  * @param bytes pointer to the array of bytes
314
318
  * @param size the size of the array
315
- * @param instance of a SerDe
316
- * @param instance of an Allocator
319
+ * @param sd instance of a SerDe
320
+ * @param comparator instance of a Comparator
321
+ * @param allocator instance of an Allocator
317
322
  * @return an instance of a sketch
318
323
  */
319
- template<typename SerDe = S>
320
- static req_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
324
+ template<typename SerDe = serde<T>>
325
+ static req_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(),
326
+ const Comparator& comparator = Comparator(), const Allocator& allocator = Allocator());
321
327
 
322
328
  /**
323
329
  * Prints a summary of the sketch.
@@ -330,10 +336,10 @@ public:
330
336
  const_iterator begin() const;
331
337
  const_iterator end() const;
332
338
 
333
- template<bool inclusive = false>
334
- quantile_sketch_sorted_view<T, Comparator, Allocator> get_sorted_view(bool cumulative) const;
339
+ quantiles_sorted_view<T, Comparator, Allocator> get_sorted_view() const;
335
340
 
336
341
  private:
342
+ Comparator comparator_;
337
343
  Allocator allocator_;
338
344
  uint16_t k_;
339
345
  bool hra_;
@@ -341,8 +347,12 @@ private:
341
347
  uint32_t num_retained_;
342
348
  uint64_t n_;
343
349
  std::vector<Compactor, AllocCompactor> compactors_;
344
- T* min_value_;
345
- T* max_value_;
350
+ T* min_item_;
351
+ T* max_item_;
352
+ mutable quantiles_sorted_view<T, Comparator, Allocator>* sorted_view_;
353
+
354
+ void setup_sorted_view() const; // modifies mutable state
355
+ void reset_sorted_view();
346
356
 
347
357
  static const bool LAZY_COMPRESSION = false;
348
358
 
@@ -366,75 +376,44 @@ private:
366
376
 
367
377
  // for deserialization
368
378
  class item_deleter;
369
- req_sketch(uint16_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors);
379
+ req_sketch(uint16_t k, bool hra, uint64_t n,
380
+ std::unique_ptr<T, item_deleter> min_item, std::unique_ptr<T, item_deleter> max_item,
381
+ std::vector<Compactor, AllocCompactor>&& compactors, const Comparator& comparator);
370
382
 
371
383
  static void check_preamble_ints(uint8_t preamble_ints, uint8_t num_levels);
372
384
  static void check_serial_version(uint8_t serial_version);
373
385
  static void check_family_id(uint8_t family_id);
374
386
 
375
- // implementations for floating point types
376
387
  template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
377
- static const TT& get_invalid_value() {
378
- static TT value = std::numeric_limits<TT>::quiet_NaN();
379
- return value;
388
+ static inline bool check_update_item(const TT& item) {
389
+ return !std::isnan(item);
380
390
  }
381
391
 
382
- template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
383
- static inline bool check_update_value(const TT& value) {
384
- return !std::isnan(value);
385
- }
386
-
387
- template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
388
- static inline void check_split_points(const T* values, uint32_t size) {
389
- for (uint32_t i = 0; i < size ; i++) {
390
- if (std::isnan(values[i])) {
391
- throw std::invalid_argument("Values must not be NaN");
392
- }
393
- if ((i < (size - 1)) && !(Comparator()(values[i], values[i + 1]))) {
394
- throw std::invalid_argument("Values must be unique and monotonically increasing");
395
- }
396
- }
397
- }
398
-
399
- // implementations for all other types
400
392
  template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
401
- static const TT& get_invalid_value() {
402
- throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of values");
403
- }
404
-
405
- template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
406
- static inline bool check_update_value(const TT&) {
393
+ static inline bool check_update_item(const TT&) {
407
394
  return true;
408
395
  }
409
396
 
410
- template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
411
- static inline void check_split_points(const T* values, uint32_t size) {
412
- for (uint32_t i = 0; i < size ; i++) {
413
- if ((i < (size - 1)) && !(Comparator()(values[i], values[i + 1]))) {
414
- throw std::invalid_argument("Values must be unique and monotonically increasing");
415
- }
416
- }
417
- }
418
-
419
397
  // for type converting constructor
420
- template<typename TT, typename CC, typename SS, typename AA>
421
- friend class req_sketch;
398
+ template<typename TT, typename CC, typename AA> friend class req_sketch;
422
399
  };
423
400
 
424
- template<typename T, typename C, typename S, typename A>
425
- class req_sketch<T, C, S, A>::const_iterator: public std::iterator<std::input_iterator_tag, T> {
401
+ template<typename T, typename C, typename A>
402
+ class req_sketch<T, C, A>::const_iterator: public std::iterator<std::input_iterator_tag, T> {
426
403
  public:
404
+ using value_type = std::pair<const T&, const uint64_t>;
427
405
  const_iterator& operator++();
428
406
  const_iterator& operator++(int);
429
407
  bool operator==(const const_iterator& other) const;
430
408
  bool operator!=(const const_iterator& other) const;
431
- std::pair<const T&, const uint64_t> operator*() const;
409
+ const value_type operator*() const;
410
+ const return_value_holder<value_type> operator->() const;
432
411
  private:
433
412
  using LevelsIterator = typename std::vector<Compactor, AllocCompactor>::const_iterator;
434
413
  LevelsIterator levels_it_;
435
414
  LevelsIterator levels_end_;
436
415
  const T* compactor_it_;
437
- friend class req_sketch<T, C, S, A>;
416
+ friend class req_sketch<T, C, A>;
438
417
  const_iterator(LevelsIterator begin, LevelsIterator end);
439
418
  };
440
419