datasketches 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -51,7 +51,7 @@ struct subset_summary {
51
51
  double total_sketch_weight;
52
52
  };
53
53
 
54
- template <typename T, typename S, typename A> class var_opt_union; // forward declaration
54
+ template <typename T, typename A> class var_opt_union; // forward declaration
55
55
 
56
56
  namespace var_opt_constants {
57
57
  const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
@@ -60,7 +60,6 @@ namespace var_opt_constants {
60
60
 
61
61
  template<
62
62
  typename T,
63
- typename S = serde<T>, // deprecated, to be removed in the next major version
64
63
  typename A = std::allocator<T>
65
64
  >
66
65
  class var_opt_sketch {
@@ -142,7 +141,7 @@ class var_opt_sketch {
142
141
  * @param instance of a SerDe
143
142
  * @return size in bytes needed to serialize this sketch
144
143
  */
145
- template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
144
+ template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
146
145
  inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
147
146
 
148
147
  /**
@@ -151,7 +150,7 @@ class var_opt_sketch {
151
150
  * @param instance of a SerDe
152
151
  * @return size in bytes needed to serialize this sketch
153
152
  */
154
- template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
153
+ template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
155
154
  inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
156
155
 
157
156
  // This is a convenience alias for users
@@ -166,7 +165,7 @@ class var_opt_sketch {
166
165
  * @param header_size_bytes space to reserve in front of the sketch
167
166
  * @param instance of a SerDe
168
167
  */
169
- template<typename SerDe = S>
168
+ template<typename SerDe = serde<T>>
170
169
  vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
171
170
 
172
171
  /**
@@ -174,19 +173,9 @@ class var_opt_sketch {
174
173
  * @param os output stream
175
174
  * @param instance of a SerDe
176
175
  */
177
- template<typename SerDe = S>
176
+ template<typename SerDe = serde<T>>
178
177
  void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
179
178
 
180
- /**
181
- * This method deserializes a sketch from a given stream.
182
- * @param is input stream
183
- * @param instance of an Allocator
184
- * @return an instance of a sketch
185
- *
186
- * Deprecated, to be removed in the next major version
187
- */
188
- static var_opt_sketch deserialize(std::istream& is, const A& allocator = A());
189
-
190
179
  /**
191
180
  * This method deserializes a sketch from a given stream.
192
181
  * @param is input stream
@@ -194,20 +183,9 @@ class var_opt_sketch {
194
183
  * @param instance of an Allocator
195
184
  * @return an instance of a sketch
196
185
  */
197
- template<typename SerDe = S>
186
+ template<typename SerDe = serde<T>>
198
187
  static var_opt_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
199
188
 
200
- /**
201
- * This method deserializes a sketch from a given array of bytes.
202
- * @param bytes pointer to the array of bytes
203
- * @param size the size of the array
204
- * @param instance of an Allocator
205
- * @return an instance of a sketch
206
- *
207
- * Deprecated, to be removed in the next major version
208
- */
209
- static var_opt_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
210
-
211
189
  /**
212
190
  * This method deserializes a sketch from a given array of bytes.
213
191
  * @param bytes pointer to the array of bytes
@@ -216,7 +194,7 @@ class var_opt_sketch {
216
194
  * @param instance of an Allocator
217
195
  * @return an instance of a sketch
218
196
  */
219
- template<typename SerDe = S>
197
+ template<typename SerDe = serde<T>>
220
198
  static var_opt_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
221
199
 
222
200
  /**
@@ -303,10 +281,9 @@ class var_opt_sketch {
303
281
  std::unique_ptr<double, weights_deleter> weights, uint32_t num_marks_in_h,
304
282
  std::unique_ptr<bool, marks_deleter> marks, const A& allocator);
305
283
 
306
- friend class var_opt_union<T,S,A>;
284
+ friend class var_opt_union<T, A>;
307
285
  var_opt_sketch(const var_opt_sketch& other, bool as_sketch, uint64_t adjusted_n);
308
- var_opt_sketch(T* data, double* weights, size_t len, uint32_t k, uint64_t n, uint32_t h_count, uint32_t r_count, double total_wt_r, const A& allocator);
309
-
286
+
310
287
  string<A> items_to_string(bool print_gap) const;
311
288
 
312
289
  // internal-use-only update
@@ -368,8 +345,8 @@ class var_opt_sketch {
368
345
  class iterator;
369
346
  };
370
347
 
371
- template<typename T, typename S, typename A>
372
- class var_opt_sketch<T, S, A>::const_iterator : public std::iterator<std::input_iterator_tag, T> {
348
+ template<typename T, typename A>
349
+ class var_opt_sketch<T, A>::const_iterator : public std::iterator<std::input_iterator_tag, T> {
373
350
  public:
374
351
  const_iterator(const const_iterator& other);
375
352
  const_iterator& operator++();
@@ -379,29 +356,28 @@ public:
379
356
  const std::pair<const T&, const double> operator*() const;
380
357
 
381
358
  private:
382
- friend class var_opt_sketch<T,S,A>;
383
- friend class var_opt_union<T,S,A>;
359
+ friend class var_opt_sketch<T, A>;
360
+ friend class var_opt_union<T, A>;
384
361
 
385
362
  // default iterator over full sketch
386
- const_iterator(const var_opt_sketch<T,S,A>& sk, bool is_end);
363
+ const_iterator(const var_opt_sketch<T, A>& sk, bool is_end);
387
364
 
388
365
  // iterates over only one of the H or R region, optionally applying weight correction
389
366
  // to R region (can correct for numerical precision issues)
390
- const_iterator(const var_opt_sketch<T,S,A>& sk, bool is_end, bool use_r_region);
367
+ const_iterator(const var_opt_sketch<T, A>& sk, bool is_end, bool use_r_region);
391
368
 
392
369
  bool get_mark() const;
393
370
 
394
- const var_opt_sketch<T,S,A>* sk_;
371
+ const var_opt_sketch<T, A>* sk_;
395
372
  double cum_r_weight_; // used for weight correction
396
373
  double r_item_wt_;
397
374
  size_t idx_;
398
375
  const size_t final_idx_;
399
- // bool weight_correction_;
400
376
  };
401
377
 
402
378
  // non-const iterator for internal use
403
- template<typename T, typename S, typename A>
404
- class var_opt_sketch<T, S, A>::iterator : public std::iterator<std::input_iterator_tag, T> {
379
+ template<typename T, typename A>
380
+ class var_opt_sketch<T, A>::iterator : public std::iterator<std::input_iterator_tag, T> {
405
381
  public:
406
382
  iterator(const iterator& other);
407
383
  iterator& operator++();
@@ -411,16 +387,16 @@ public:
411
387
  std::pair<T&, double> operator*();
412
388
 
413
389
  private:
414
- friend class var_opt_sketch<T,S,A>;
415
- friend class var_opt_union<T,S,A>;
390
+ friend class var_opt_sketch<T, A>;
391
+ friend class var_opt_union<T, A>;
416
392
 
417
393
  // iterates over only one of the H or R region, applying weight correction
418
394
  // if iterating over R region (can correct for numerical precision issues)
419
- iterator(const var_opt_sketch<T,S,A>& sk, bool is_end, bool use_r_region);
395
+ iterator(const var_opt_sketch<T, A>& sk, bool is_end, bool use_r_region);
420
396
 
421
397
  bool get_mark() const;
422
398
 
423
- const var_opt_sketch<T,S,A>* sk_;
399
+ const var_opt_sketch<T, A>* sk_;
424
400
  double cum_r_weight_; // used for weight correction
425
401
  double r_item_wt_;
426
402
  size_t idx_;