datasketches 0.2.7 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -31,7 +31,8 @@ typename Allocator
31
31
  >
32
32
  class req_compactor {
33
33
  public:
34
- req_compactor(bool hra, uint8_t lg_weight, uint32_t section_size, const Allocator& allocator, bool sorted = true);
34
+ req_compactor(bool hra, uint8_t lg_weight, uint32_t section_size, const Comparator& comparator,
35
+ const Allocator& allocator, bool sorted = true);
35
36
  ~req_compactor();
36
37
  req_compactor(const req_compactor& other);
37
38
  req_compactor(req_compactor&& other) noexcept;
@@ -39,7 +40,7 @@ public:
39
40
  req_compactor& operator=(req_compactor&& other);
40
41
 
41
42
  template<typename TT, typename CC, typename AA>
42
- req_compactor(const req_compactor<TT, CC, AA>& other, const Allocator& allocator);
43
+ req_compactor(const req_compactor<TT, CC, AA>& other, const Comparator& comparator, const Allocator& allocator);
43
44
 
44
45
  bool is_sorted() const;
45
46
  uint32_t get_num_items() const;
@@ -50,8 +51,7 @@ public:
50
51
  T* begin();
51
52
  T* end();
52
53
 
53
- template<bool inclusive>
54
- uint64_t compute_weight(const T& item) const;
54
+ uint64_t compute_weight(const T& item, bool inclusive) const;
55
55
 
56
56
  template<typename FwdT>
57
57
  void append(FwdT&& item);
@@ -86,18 +86,23 @@ public:
86
86
  size_t serialize(void* dst, size_t capacity, const S& serde) const;
87
87
 
88
88
  template<typename S>
89
- static req_compactor deserialize(std::istream& is, const S& serde, const Allocator& allocator, bool sorted, bool hra);
89
+ static req_compactor deserialize(std::istream& is, const S& serde, const Comparator& comparator,
90
+ const Allocator& allocator, bool sorted, bool hra);
90
91
 
91
92
  template<typename S>
92
- static std::pair<req_compactor, size_t> deserialize(const void* bytes, size_t size, const S& serde, const Allocator& allocator, bool sorted, bool hra);
93
+ static std::pair<req_compactor, size_t> deserialize(const void* bytes, size_t size, const S& serde,
94
+ const Comparator& comparator, const Allocator& allocator, bool sorted, bool hra);
93
95
 
94
96
  template<typename S>
95
- static req_compactor deserialize(std::istream& is, const S& serde, const Allocator& allocator, bool sorted, uint16_t k, uint8_t num_items, bool hra);
97
+ static req_compactor deserialize(std::istream& is, const S& serde, const Comparator& comparator,
98
+ const Allocator& allocator, bool sorted, uint16_t k, uint8_t num_items, bool hra);
96
99
 
97
100
  template<typename S>
98
- static std::pair<req_compactor, size_t> deserialize(const void* bytes, size_t size, const S& serde, const Allocator& allocator, bool sorted, uint16_t k, uint8_t num_items, bool hra);
101
+ static std::pair<req_compactor, size_t> deserialize(const void* bytes, size_t size, const S& serde,
102
+ const Comparator& comparator, const Allocator& allocator, bool sorted, uint16_t k, uint8_t num_items, bool hra);
99
103
 
100
104
  private:
105
+ Comparator comparator_;
101
106
  Allocator allocator_;
102
107
  uint8_t lg_weight_;
103
108
  bool hra_;
@@ -123,7 +128,8 @@ private:
123
128
 
124
129
  // for deserialization
125
130
  class items_deleter;
126
- req_compactor(bool hra, uint8_t lg_weight, bool sorted, float section_size_raw, uint8_t num_sections, uint64_t state, std::unique_ptr<T, items_deleter> items, uint32_t num_items, const Allocator& allocator);
131
+ req_compactor(bool hra, uint8_t lg_weight, bool sorted, float section_size_raw, uint8_t num_sections, uint64_t state,
132
+ std::unique_ptr<T, items_deleter> items, uint32_t num_items, const Comparator& comparator, const Allocator& allocator);
127
133
 
128
134
  template<typename S>
129
135
  static std::unique_ptr<T, items_deleter> deserialize_items(std::istream& is, const S& serde, const Allocator& allocator, uint32_t num);
@@ -33,7 +33,9 @@
33
33
  namespace datasketches {
34
34
 
35
35
  template<typename T, typename C, typename A>
36
- req_compactor<T, C, A>::req_compactor(bool hra, uint8_t lg_weight, uint32_t section_size, const A& allocator, bool sorted):
36
+ req_compactor<T, C, A>::req_compactor(bool hra, uint8_t lg_weight, uint32_t section_size,
37
+ const C& comparator, const A& allocator, bool sorted):
38
+ comparator_(comparator),
37
39
  allocator_(allocator),
38
40
  lg_weight_(lg_weight),
39
41
  hra_(hra),
@@ -58,6 +60,7 @@ req_compactor<T, C, A>::~req_compactor() {
58
60
 
59
61
  template<typename T, typename C, typename A>
60
62
  req_compactor<T, C, A>::req_compactor(const req_compactor& other):
63
+ comparator_(other.comparator_),
61
64
  allocator_(other.allocator_),
62
65
  lg_weight_(other.lg_weight_),
63
66
  hra_(other.hra_),
@@ -81,6 +84,7 @@ items_(nullptr)
81
84
 
82
85
  template<typename T, typename C, typename A>
83
86
  req_compactor<T, C, A>::req_compactor(req_compactor&& other) noexcept :
87
+ comparator_(std::move(other.comparator_)),
84
88
  allocator_(std::move(other.allocator_)),
85
89
  lg_weight_(other.lg_weight_),
86
90
  hra_(other.hra_),
@@ -100,6 +104,7 @@ items_(other.items_)
100
104
  template<typename T, typename C, typename A>
101
105
  req_compactor<T, C, A>& req_compactor<T, C, A>::operator=(const req_compactor& other) {
102
106
  req_compactor copy(other);
107
+ std::swap(comparator_, copy.comparator_);
103
108
  std::swap(allocator_, copy.allocator_);
104
109
  std::swap(lg_weight_, copy.lg_weight_);
105
110
  std::swap(hra_, copy.hra_);
@@ -117,6 +122,7 @@ req_compactor<T, C, A>& req_compactor<T, C, A>::operator=(const req_compactor& o
117
122
 
118
123
  template<typename T, typename C, typename A>
119
124
  req_compactor<T, C, A>& req_compactor<T, C, A>::operator=(req_compactor&& other) {
125
+ std::swap(comparator_, other.comparator_);
120
126
  std::swap(allocator_, other.allocator_);
121
127
  std::swap(lg_weight_, other.lg_weight_);
122
128
  std::swap(hra_, other.hra_);
@@ -134,7 +140,8 @@ req_compactor<T, C, A>& req_compactor<T, C, A>::operator=(req_compactor&& other)
134
140
 
135
141
  template<typename T, typename C, typename A>
136
142
  template<typename TT, typename CC, typename AA>
137
- req_compactor<T, C, A>::req_compactor(const req_compactor<TT, CC, AA>& other, const A& allocator):
143
+ req_compactor<T, C, A>::req_compactor(const req_compactor<TT, CC, AA>& other, const C& comparator, const A& allocator):
144
+ comparator_(comparator),
138
145
  allocator_(allocator),
139
146
  lg_weight_(other.lg_weight_),
140
147
  hra_(other.hra_),
@@ -153,7 +160,7 @@ items_(nullptr)
153
160
  const uint32_t from = hra_ ? capacity_ - num_items_ : 0;
154
161
  const uint32_t to = hra_ ? capacity_ : num_items_;
155
162
  for (uint32_t i = from; i < to; ++i) new (items_ + i) T(other.items_[i]);
156
- if (sorted_ && !std::is_sorted(items_ + from, items_ + to, C())) {
163
+ if (sorted_ && !std::is_sorted(items_ + from, items_ + to, comparator_)) {
157
164
  throw std::logic_error("items must be sorted");
158
165
  }
159
166
  }
@@ -180,12 +187,11 @@ uint8_t req_compactor<T, C, A>::get_lg_weight() const {
180
187
  }
181
188
 
182
189
  template<typename T, typename C, typename A>
183
- template<bool inclusive>
184
- uint64_t req_compactor<T, C, A>::compute_weight(const T& item) const {
190
+ uint64_t req_compactor<T, C, A>::compute_weight(const T& item, bool inclusive) const {
185
191
  if (!sorted_) const_cast<req_compactor*>(this)->sort(); // allow sorting as a side effect
186
192
  auto it = inclusive ?
187
- std::upper_bound(begin(), end(), item, C()) :
188
- std::lower_bound(begin(), end(), item, C());
193
+ std::upper_bound(begin(), end(), item, comparator_) :
194
+ std::lower_bound(begin(), end(), item, comparator_);
189
195
  return std::distance(begin(), it) << lg_weight_;
190
196
  }
191
197
 
@@ -251,7 +257,7 @@ void req_compactor<T, C, A>::merge(FwdC&& other) {
251
257
  auto to = from + other.get_num_items();
252
258
  auto other_it = other.begin();
253
259
  for (auto it = from; it != to; ++it, ++other_it) new (it) T(conditional_forward<FwdC>(*other_it));
254
- if (!other.sorted_) std::sort(from, to, C());
260
+ if (!other.sorted_) std::sort(from, to, comparator_);
255
261
  if (num_items_ > 0) std::inplace_merge(hra_ ? from : begin(), items_ + offset, hra_ ? end() : to, C());
256
262
  num_items_ += other.get_num_items();
257
263
  }
@@ -259,7 +265,7 @@ void req_compactor<T, C, A>::merge(FwdC&& other) {
259
265
  template<typename T, typename C, typename A>
260
266
  void req_compactor<T, C, A>::sort() {
261
267
  if (!sorted_) {
262
- std::sort(begin(), end(), C());
268
+ std::sort(begin(), end(), comparator_);
263
269
  sorted_ = true;
264
270
  }
265
271
  }
@@ -281,7 +287,7 @@ std::pair<uint32_t, uint32_t> req_compactor<T, C, A>::compact(req_compactor& nex
281
287
  auto next_empty = hra_ ? next.begin() - num : next.end();
282
288
  promote_evens_or_odds(begin() + compaction_range.first, begin() + compaction_range.second, coin_, next_empty);
283
289
  next.num_items_ += num;
284
- std::inplace_merge(next.begin(), next_middle, next.end(), C());
290
+ std::inplace_merge(next.begin(), next_middle, next.end(), comparator_);
285
291
  for (size_t i = compaction_range.first; i < compaction_range.second; ++i) (*(begin() + i)).~T();
286
292
  num_items_ -= compaction_range.second - compaction_range.first;
287
293
 
@@ -389,7 +395,8 @@ size_t req_compactor<T, C, A>::serialize(void* dst, size_t capacity, const S& se
389
395
 
390
396
  template<typename T, typename C, typename A>
391
397
  template<typename S>
392
- req_compactor<T, C, A> req_compactor<T, C, A>::deserialize(std::istream& is, const S& serde, const A& allocator, bool sorted, bool hra) {
398
+ req_compactor<T, C, A> req_compactor<T, C, A>::deserialize(std::istream& is, const S& serde,
399
+ const C& comparator, const A& allocator, bool sorted, bool hra) {
393
400
  auto state = read<decltype(state_)>(is);
394
401
  auto section_size_raw = read<decltype(section_size_raw_)>(is);
395
402
  auto lg_weight = read<decltype(lg_weight_)>(is);
@@ -397,14 +404,17 @@ req_compactor<T, C, A> req_compactor<T, C, A>::deserialize(std::istream& is, con
397
404
  read<uint16_t>(is); // padding
398
405
  auto num_items = read<uint32_t>(is);
399
406
  auto items = deserialize_items(is, serde, allocator, num_items);
400
- return req_compactor(hra, lg_weight, sorted, section_size_raw, num_sections, state, std::move(items), num_items, allocator);
407
+ return req_compactor(hra, lg_weight, sorted, section_size_raw, num_sections, state, std::move(items), num_items,
408
+ comparator, allocator);
401
409
  }
402
410
 
403
411
  template<typename T, typename C, typename A>
404
412
  template<typename S>
405
- req_compactor<T, C, A> req_compactor<T, C, A>::deserialize(std::istream& is, const S& serde, const A& allocator, bool sorted, uint16_t k, uint8_t num_items, bool hra) {
413
+ req_compactor<T, C, A> req_compactor<T, C, A>::deserialize(std::istream& is, const S& serde,
414
+ const C& comparator, const A& allocator, bool sorted, uint16_t k, uint8_t num_items, bool hra) {
406
415
  auto items = deserialize_items(is, serde, allocator, num_items);
407
- return req_compactor(hra, 0, sorted, k, req_constants::INIT_NUM_SECTIONS, 0, std::move(items), num_items, allocator);
416
+ return req_compactor(hra, 0, sorted, k, req_constants::INIT_NUM_SECTIONS, 0, std::move(items), num_items,
417
+ comparator, allocator);
408
418
  }
409
419
 
410
420
  template<typename T, typename C, typename A>
@@ -422,7 +432,8 @@ auto req_compactor<T, C, A>::deserialize_items(std::istream& is, const S& serde,
422
432
 
423
433
  template<typename T, typename C, typename A>
424
434
  template<typename S>
425
- std::pair<req_compactor<T, C, A>, size_t> req_compactor<T, C, A>::deserialize(const void* bytes, size_t size, const S& serde, const A& allocator, bool sorted, bool hra) {
435
+ std::pair<req_compactor<T, C, A>, size_t> req_compactor<T, C, A>::deserialize(const void* bytes, size_t size,
436
+ const S& serde, const C& comparator, const A& allocator, bool sorted, bool hra) {
426
437
  ensure_minimum_memory(size, 8);
427
438
  const char* ptr = static_cast<const char*>(bytes);
428
439
  const char* end_ptr = static_cast<const char*>(bytes) + size;
@@ -441,17 +452,20 @@ std::pair<req_compactor<T, C, A>, size_t> req_compactor<T, C, A>::deserialize(co
441
452
  auto pair = deserialize_items(ptr, end_ptr - ptr, serde, allocator, num_items);
442
453
  ptr += pair.second;
443
454
  return std::pair<req_compactor, size_t>(
444
- req_compactor(hra, lg_weight, sorted, section_size_raw, num_sections, state, std::move(pair.first), num_items, allocator),
455
+ req_compactor(hra, lg_weight, sorted, section_size_raw, num_sections, state, std::move(pair.first), num_items,
456
+ comparator, allocator),
445
457
  ptr - static_cast<const char*>(bytes)
446
458
  );
447
459
  }
448
460
 
449
461
  template<typename T, typename C, typename A>
450
462
  template<typename S>
451
- std::pair<req_compactor<T, C, A>, size_t> req_compactor<T, C, A>::deserialize(const void* bytes, size_t size, const S& serde, const A& allocator, bool sorted, uint16_t k, uint8_t num_items, bool hra) {
463
+ std::pair<req_compactor<T, C, A>, size_t> req_compactor<T, C, A>::deserialize(const void* bytes, size_t size,
464
+ const S& serde, const C& comparator, const A& allocator, bool sorted, uint16_t k, uint8_t num_items, bool hra) {
452
465
  auto pair = deserialize_items(bytes, size, serde, allocator, num_items);
453
466
  return std::pair<req_compactor, size_t>(
454
- req_compactor(hra, 0, sorted, k, req_constants::INIT_NUM_SECTIONS, 0, std::move(pair.first), num_items, allocator),
467
+ req_compactor(hra, 0, sorted, k, req_constants::INIT_NUM_SECTIONS, 0, std::move(pair.first), num_items,
468
+ comparator, allocator),
455
469
  pair.second
456
470
  );
457
471
  }
@@ -475,7 +489,9 @@ auto req_compactor<T, C, A>::deserialize_items(const void* bytes, size_t size, c
475
489
 
476
490
 
477
491
  template<typename T, typename C, typename A>
478
- req_compactor<T, C, A>::req_compactor(bool hra, uint8_t lg_weight, bool sorted, float section_size_raw, uint8_t num_sections, uint64_t state, std::unique_ptr<T, items_deleter> items, uint32_t num_items, const A& allocator):
492
+ req_compactor<T, C, A>::req_compactor(bool hra, uint8_t lg_weight, bool sorted, float section_size_raw, uint8_t num_sections,
493
+ uint64_t state, std::unique_ptr<T, items_deleter> items, uint32_t num_items, const C& comparator, const A& allocator):
494
+ comparator_(comparator),
479
495
  allocator_(allocator),
480
496
  lg_weight_(lg_weight),
481
497
  hra_(hra),