datasketches 0.2.6 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (121) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +9 -0
  3. data/LICENSE +4 -6
  4. data/NOTICE +6 -5
  5. data/ext/datasketches/kll_wrapper.cpp +20 -20
  6. data/ext/datasketches/theta_wrapper.cpp +2 -2
  7. data/lib/datasketches/version.rb +1 -1
  8. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  9. data/vendor/datasketches-cpp/LICENSE +4 -6
  10. data/vendor/datasketches-cpp/MANIFEST.in +21 -4
  11. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  12. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  13. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  14. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  15. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  16. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  17. data/vendor/datasketches-cpp/common/{test/test_runner.cpp → include/version.hpp.in} +15 -8
  18. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +37 -7
  19. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +22 -1
  20. data/vendor/datasketches-cpp/common/test/integration_test.cpp +1 -1
  21. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  22. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +1 -1
  24. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +1 -1
  25. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -1
  26. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  27. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  28. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  29. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  30. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +17 -10
  31. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  32. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +55 -42
  33. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -1
  34. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +4 -4
  35. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  36. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
  37. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  38. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -1
  39. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +1 -1
  40. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +1 -1
  41. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -1
  42. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +1 -1
  43. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +1 -1
  44. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +1 -1
  45. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -1
  46. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +1 -1
  47. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  48. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  49. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  50. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  51. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +27 -27
  52. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +197 -233
  53. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +42 -32
  54. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
  55. data/vendor/datasketches-cpp/pyproject.toml +17 -13
  56. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  57. data/vendor/datasketches-cpp/python/README.md +1 -1
  58. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  59. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  60. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  61. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  62. data/vendor/datasketches-cpp/python/pybind11Path.cmd +19 -1
  63. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  64. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  65. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  66. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  67. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  68. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  69. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  70. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  71. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  72. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  73. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  74. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  75. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  76. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +1 -1
  78. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +20 -19
  79. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +241 -233
  80. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  84. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  85. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +27 -27
  86. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +117 -104
  87. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  88. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  89. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  90. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  91. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  92. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +7 -7
  93. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +3 -3
  94. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +5 -5
  95. data/vendor/datasketches-cpp/setup.py +14 -3
  96. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  97. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  98. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  99. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  100. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +1 -1
  101. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +1 -1
  102. data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +1 -1
  103. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +1 -1
  104. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +3 -2
  105. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +1 -1
  106. data/vendor/datasketches-cpp/tox.ini +26 -0
  107. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  108. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +41 -35
  109. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  110. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
  111. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  112. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -1
  113. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -1
  114. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +1 -1
  115. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
  116. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +27 -1
  117. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -1
  118. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  119. metadata +14 -7
  120. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
  121. data/vendor/datasketches-cpp/common/test/catch.hpp +0 -17618
@@ -51,7 +51,7 @@ struct subset_summary {
51
51
  double total_sketch_weight;
52
52
  };
53
53
 
54
- template <typename T, typename S, typename A> class var_opt_union; // forward declaration
54
+ template <typename T, typename A> class var_opt_union; // forward declaration
55
55
 
56
56
  namespace var_opt_constants {
57
57
  const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
@@ -60,7 +60,6 @@ namespace var_opt_constants {
60
60
 
61
61
  template<
62
62
  typename T,
63
- typename S = serde<T>, // deprecated, to be removed in the next major version
64
63
  typename A = std::allocator<T>
65
64
  >
66
65
  class var_opt_sketch {
@@ -142,7 +141,7 @@ class var_opt_sketch {
142
141
  * @param instance of a SerDe
143
142
  * @return size in bytes needed to serialize this sketch
144
143
  */
145
- template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
144
+ template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
146
145
  inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
147
146
 
148
147
  /**
@@ -151,7 +150,7 @@ class var_opt_sketch {
151
150
  * @param instance of a SerDe
152
151
  * @return size in bytes needed to serialize this sketch
153
152
  */
154
- template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
153
+ template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
155
154
  inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
156
155
 
157
156
  // This is a convenience alias for users
@@ -166,7 +165,7 @@ class var_opt_sketch {
166
165
  * @param header_size_bytes space to reserve in front of the sketch
167
166
  * @param instance of a SerDe
168
167
  */
169
- template<typename SerDe = S>
168
+ template<typename SerDe = serde<T>>
170
169
  vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
171
170
 
172
171
  /**
@@ -174,19 +173,9 @@ class var_opt_sketch {
174
173
  * @param os output stream
175
174
  * @param instance of a SerDe
176
175
  */
177
- template<typename SerDe = S>
176
+ template<typename SerDe = serde<T>>
178
177
  void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
179
178
 
180
- /**
181
- * This method deserializes a sketch from a given stream.
182
- * @param is input stream
183
- * @param instance of an Allocator
184
- * @return an instance of a sketch
185
- *
186
- * Deprecated, to be removed in the next major version
187
- */
188
- static var_opt_sketch deserialize(std::istream& is, const A& allocator = A());
189
-
190
179
  /**
191
180
  * This method deserializes a sketch from a given stream.
192
181
  * @param is input stream
@@ -194,20 +183,9 @@ class var_opt_sketch {
194
183
  * @param instance of an Allocator
195
184
  * @return an instance of a sketch
196
185
  */
197
- template<typename SerDe = S>
186
+ template<typename SerDe = serde<T>>
198
187
  static var_opt_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
199
188
 
200
- /**
201
- * This method deserializes a sketch from a given array of bytes.
202
- * @param bytes pointer to the array of bytes
203
- * @param size the size of the array
204
- * @param instance of an Allocator
205
- * @return an instance of a sketch
206
- *
207
- * Deprecated, to be removed in the next major version
208
- */
209
- static var_opt_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
210
-
211
189
  /**
212
190
  * This method deserializes a sketch from a given array of bytes.
213
191
  * @param bytes pointer to the array of bytes
@@ -216,7 +194,7 @@ class var_opt_sketch {
216
194
  * @param instance of an Allocator
217
195
  * @return an instance of a sketch
218
196
  */
219
- template<typename SerDe = S>
197
+ template<typename SerDe = serde<T>>
220
198
  static var_opt_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
221
199
 
222
200
  /**
@@ -303,10 +281,9 @@ class var_opt_sketch {
303
281
  std::unique_ptr<double, weights_deleter> weights, uint32_t num_marks_in_h,
304
282
  std::unique_ptr<bool, marks_deleter> marks, const A& allocator);
305
283
 
306
- friend class var_opt_union<T,S,A>;
284
+ friend class var_opt_union<T, A>;
307
285
  var_opt_sketch(const var_opt_sketch& other, bool as_sketch, uint64_t adjusted_n);
308
- var_opt_sketch(T* data, double* weights, size_t len, uint32_t k, uint64_t n, uint32_t h_count, uint32_t r_count, double total_wt_r, const A& allocator);
309
-
286
+
310
287
  string<A> items_to_string(bool print_gap) const;
311
288
 
312
289
  // internal-use-only update
@@ -368,8 +345,8 @@ class var_opt_sketch {
368
345
  class iterator;
369
346
  };
370
347
 
371
- template<typename T, typename S, typename A>
372
- class var_opt_sketch<T, S, A>::const_iterator : public std::iterator<std::input_iterator_tag, T> {
348
+ template<typename T, typename A>
349
+ class var_opt_sketch<T, A>::const_iterator : public std::iterator<std::input_iterator_tag, T> {
373
350
  public:
374
351
  const_iterator(const const_iterator& other);
375
352
  const_iterator& operator++();
@@ -379,29 +356,28 @@ public:
379
356
  const std::pair<const T&, const double> operator*() const;
380
357
 
381
358
  private:
382
- friend class var_opt_sketch<T,S,A>;
383
- friend class var_opt_union<T,S,A>;
359
+ friend class var_opt_sketch<T, A>;
360
+ friend class var_opt_union<T, A>;
384
361
 
385
362
  // default iterator over full sketch
386
- const_iterator(const var_opt_sketch<T,S,A>& sk, bool is_end);
363
+ const_iterator(const var_opt_sketch<T, A>& sk, bool is_end);
387
364
 
388
365
  // iterates over only one of the H or R region, optionally applying weight correction
389
366
  // to R region (can correct for numerical precision issues)
390
- const_iterator(const var_opt_sketch<T,S,A>& sk, bool is_end, bool use_r_region);
367
+ const_iterator(const var_opt_sketch<T, A>& sk, bool is_end, bool use_r_region);
391
368
 
392
369
  bool get_mark() const;
393
370
 
394
- const var_opt_sketch<T,S,A>* sk_;
371
+ const var_opt_sketch<T, A>* sk_;
395
372
  double cum_r_weight_; // used for weight correction
396
373
  double r_item_wt_;
397
374
  size_t idx_;
398
375
  const size_t final_idx_;
399
- // bool weight_correction_;
400
376
  };
401
377
 
402
378
  // non-const iterator for internal use
403
- template<typename T, typename S, typename A>
404
- class var_opt_sketch<T, S, A>::iterator : public std::iterator<std::input_iterator_tag, T> {
379
+ template<typename T, typename A>
380
+ class var_opt_sketch<T, A>::iterator : public std::iterator<std::input_iterator_tag, T> {
405
381
  public:
406
382
  iterator(const iterator& other);
407
383
  iterator& operator++();
@@ -411,16 +387,16 @@ public:
411
387
  std::pair<T&, double> operator*();
412
388
 
413
389
  private:
414
- friend class var_opt_sketch<T,S,A>;
415
- friend class var_opt_union<T,S,A>;
390
+ friend class var_opt_sketch<T, A>;
391
+ friend class var_opt_union<T, A>;
416
392
 
417
393
  // iterates over only one of the H or R region, applying weight correction
418
394
  // if iterating over R region (can correct for numerical precision issues)
419
- iterator(const var_opt_sketch<T,S,A>& sk, bool is_end, bool use_r_region);
395
+ iterator(const var_opt_sketch<T, A>& sk, bool is_end, bool use_r_region);
420
396
 
421
397
  bool get_mark() const;
422
398
 
423
- const var_opt_sketch<T,S,A>* sk_;
399
+ const var_opt_sketch<T, A>* sk_;
424
400
  double cum_r_weight_; // used for weight correction
425
401
  double r_item_wt_;
426
402
  size_t idx_;