datasketches 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -42,12 +42,12 @@ namespace datasketches {
42
42
  * author Kevin Lang
43
43
  * author Jon Malkin
44
44
  */
45
- template<typename T, typename S, typename A>
46
- var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf, const A& allocator) :
47
- var_opt_sketch<T,S,A>(k, rf, false, allocator) {}
45
+ template<typename T, typename A>
46
+ var_opt_sketch<T, A>::var_opt_sketch(uint32_t k, resize_factor rf, const A& allocator) :
47
+ var_opt_sketch(k, rf, false, allocator) {}
48
48
 
49
- template<typename T, typename S, typename A>
50
- var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other) :
49
+ template<typename T, typename A>
50
+ var_opt_sketch<T, A>::var_opt_sketch(const var_opt_sketch& other) :
51
51
  k_(other.k_),
52
52
  h_(other.h_),
53
53
  m_(other.m_),
@@ -83,8 +83,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other) :
83
83
  }
84
84
  }
85
85
 
86
- template<typename T, typename S, typename A>
87
- var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other, bool as_sketch, uint64_t adjusted_n) :
86
+ template<typename T, typename A>
87
+ var_opt_sketch<T, A>::var_opt_sketch(const var_opt_sketch& other, bool as_sketch, uint64_t adjusted_n) :
88
88
  k_(other.k_),
89
89
  h_(other.h_),
90
90
  m_(other.m_),
@@ -120,27 +120,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other, bool as_sketc
120
120
  }
121
121
  }
122
122
 
123
- template<typename T, typename S, typename A>
124
- var_opt_sketch<T,S,A>::var_opt_sketch(T* data, double* weights, size_t len,
125
- uint32_t k, uint64_t n, uint32_t h_count, uint32_t r_count, double total_wt_r, const A& allocator) :
126
- k_(k),
127
- h_(h_count),
128
- m_(0),
129
- r_(r_count),
130
- n_(n),
131
- total_wt_r_(total_wt_r),
132
- rf_(var_opt_constants::DEFAULT_RESIZE_FACTOR),
133
- curr_items_alloc_(len),
134
- filled_data_(n > k),
135
- allocator_(allocator),
136
- data_(data),
137
- weights_(weights),
138
- num_marks_in_h_(0),
139
- marks_(nullptr)
140
- {}
141
-
142
- template<typename T, typename S, typename A>
143
- var_opt_sketch<T,S,A>::var_opt_sketch(var_opt_sketch&& other) noexcept :
123
+ template<typename T, typename A>
124
+ var_opt_sketch<T, A>::var_opt_sketch(var_opt_sketch&& other) noexcept :
144
125
  k_(other.k_),
145
126
  h_(other.h_),
146
127
  m_(other.m_),
@@ -161,8 +142,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(var_opt_sketch&& other) noexcept :
161
142
  other.marks_ = nullptr;
162
143
  }
163
144
 
164
- template<typename T, typename S, typename A>
165
- var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadget, const A& allocator) :
145
+ template<typename T, typename A>
146
+ var_opt_sketch<T, A>::var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadget, const A& allocator) :
166
147
  k_(k), h_(0), m_(0), r_(0), n_(0), total_wt_r_(0.0), rf_(rf), allocator_(allocator) {
167
148
  if (k == 0 || k_ > MAX_K) {
168
149
  throw std::invalid_argument("k must be at least 1 and less than 2^31 - 1");
@@ -179,8 +160,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadg
179
160
  num_marks_in_h_ = 0;
180
161
  }
181
162
 
182
- template<typename T, typename S, typename A>
183
- var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, uint32_t h, uint32_t m, uint32_t r, uint64_t n, double total_wt_r, resize_factor rf,
163
+ template<typename T, typename A>
164
+ var_opt_sketch<T, A>::var_opt_sketch(uint32_t k, uint32_t h, uint32_t m, uint32_t r, uint64_t n, double total_wt_r, resize_factor rf,
184
165
  uint32_t curr_items_alloc, bool filled_data, std::unique_ptr<T, items_deleter> items,
185
166
  std::unique_ptr<double, weights_deleter> weights, uint32_t num_marks_in_h,
186
167
  std::unique_ptr<bool, marks_deleter> marks, const A& allocator) :
@@ -201,8 +182,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, uint32_t h, uint32_t m, uint32
201
182
  {}
202
183
 
203
184
 
204
- template<typename T, typename S, typename A>
205
- var_opt_sketch<T,S,A>::~var_opt_sketch() {
185
+ template<typename T, typename A>
186
+ var_opt_sketch<T, A>::~var_opt_sketch() {
206
187
  if (data_ != nullptr) {
207
188
  if (filled_data_) {
208
189
  // destroy everything
@@ -232,9 +213,9 @@ var_opt_sketch<T,S,A>::~var_opt_sketch() {
232
213
  }
233
214
  }
234
215
 
235
- template<typename T, typename S, typename A>
236
- var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(const var_opt_sketch& other) {
237
- var_opt_sketch<T,S,A> sk_copy(other);
216
+ template<typename T, typename A>
217
+ var_opt_sketch<T, A>& var_opt_sketch<T, A>::operator=(const var_opt_sketch& other) {
218
+ var_opt_sketch sk_copy(other);
238
219
  std::swap(k_, sk_copy.k_);
239
220
  std::swap(h_, sk_copy.h_);
240
221
  std::swap(m_, sk_copy.m_);
@@ -252,8 +233,8 @@ var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(const var_opt_sketch& ot
252
233
  return *this;
253
234
  }
254
235
 
255
- template<typename T, typename S, typename A>
256
- var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(var_opt_sketch&& other) {
236
+ template<typename T, typename A>
237
+ var_opt_sketch<T, A>& var_opt_sketch<T, A>::operator=(var_opt_sketch&& other) {
257
238
  std::swap(k_, other.k_);
258
239
  std::swap(h_, other.h_);
259
240
  std::swap(m_, other.m_);
@@ -311,9 +292,9 @@ var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(var_opt_sketch&& other)
311
292
  */
312
293
 
313
294
  // implementation for fixed-size arithmetic types (integral and floating point)
314
- template<typename T, typename S, typename A>
295
+ template<typename T, typename A>
315
296
  template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
316
- size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes(const SerDe&) const {
297
+ size_t var_opt_sketch<T, A>::get_serialized_size_bytes(const SerDe&) const {
317
298
  if (is_empty()) { return PREAMBLE_LONGS_EMPTY << 3; }
318
299
  size_t num_bytes = (r_ == 0 ? PREAMBLE_LONGS_WARMUP : PREAMBLE_LONGS_FULL) << 3;
319
300
  num_bytes += h_ * sizeof(double); // weights
@@ -325,9 +306,9 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes(const SerDe&) const {
325
306
  }
326
307
 
327
308
  // implementation for all other types
328
- template<typename T, typename S, typename A>
309
+ template<typename T, typename A>
329
310
  template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
330
- size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes(const SerDe& sd) const {
311
+ size_t var_opt_sketch<T, A>::get_serialized_size_bytes(const SerDe& sd) const {
331
312
  if (is_empty()) { return PREAMBLE_LONGS_EMPTY << 3; }
332
313
  size_t num_bytes = (r_ == 0 ? PREAMBLE_LONGS_WARMUP : PREAMBLE_LONGS_FULL) << 3;
333
314
  num_bytes += h_ * sizeof(double); // weights
@@ -340,9 +321,9 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes(const SerDe& sd) const {
340
321
  return num_bytes;
341
322
  }
342
323
 
343
- template<typename T, typename S, typename A>
324
+ template<typename T, typename A>
344
325
  template<typename SerDe>
345
- std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
326
+ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
346
327
  const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
347
328
  std::vector<uint8_t, AllocU8<A>> bytes(size, 0, allocator_);
348
329
  uint8_t* ptr = bytes.data() + header_size_bytes;
@@ -414,9 +395,9 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
414
395
  return bytes;
415
396
  }
416
397
 
417
- template<typename T, typename S, typename A>
398
+ template<typename T, typename A>
418
399
  template<typename SerDe>
419
- void var_opt_sketch<T,S,A>::serialize(std::ostream& os, const SerDe& sd) const {
400
+ void var_opt_sketch<T, A>::serialize(std::ostream& os, const SerDe& sd) const {
420
401
  const bool empty = (h_ == 0) && (r_ == 0);
421
402
 
422
403
  const uint8_t preLongs = (empty ? PREAMBLE_LONGS_EMPTY
@@ -477,14 +458,9 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os, const SerDe& sd) const {
477
458
  }
478
459
  }
479
460
 
480
- template<typename T, typename S, typename A>
481
- var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size_t size, const A& allocator) {
482
- return deserialize(bytes, size, S(), allocator);
483
- }
484
-
485
- template<typename T, typename S, typename A>
461
+ template<typename T, typename A>
486
462
  template<typename SerDe>
487
- var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
463
+ var_opt_sketch<T, A> var_opt_sketch<T, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
488
464
  ensure_minimum_memory(size, 8);
489
465
  const char* ptr = static_cast<const char*>(bytes);
490
466
  const char* base = ptr;
@@ -510,7 +486,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
510
486
  const bool is_gadget = flags & GADGET_FLAG_MASK;
511
487
 
512
488
  if (is_empty) {
513
- return var_opt_sketch<T,S,A>(k, rf, is_gadget, allocator);
489
+ return var_opt_sketch(k, rf, is_gadget, allocator);
514
490
  }
515
491
 
516
492
  // second and third prelongs
@@ -578,14 +554,9 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
578
554
  std::move(items), std::move(weights), num_marks_in_h, std::move(marks), allocator);
579
555
  }
580
556
 
581
- template<typename T, typename S, typename A>
582
- var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const A& allocator) {
583
- return deserialize(is, S(), allocator);
584
- }
585
-
586
- template<typename T, typename S, typename A>
557
+ template<typename T, typename A>
587
558
  template<typename SerDe>
588
- var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
559
+ var_opt_sketch<T, A> var_opt_sketch<T, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
589
560
  const auto first_byte = read<uint8_t>(is);
590
561
  uint8_t preamble_longs = first_byte & 0x3f;
591
562
  const resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
@@ -604,7 +575,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const
604
575
  if (!is.good())
605
576
  throw std::runtime_error("error reading from std::istream");
606
577
  else
607
- return var_opt_sketch<T,S,A>(k, rf, is_gadget, allocator);
578
+ return var_opt_sketch(k, rf, is_gadget, allocator);
608
579
  }
609
580
 
610
581
  // second and third prelongs
@@ -668,13 +639,13 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const
668
639
  std::move(items), std::move(weights), num_marks_in_h, std::move(marks), allocator);
669
640
  }
670
641
 
671
- template<typename T, typename S, typename A>
672
- bool var_opt_sketch<T,S,A>::is_empty() const {
642
+ template<typename T, typename A>
643
+ bool var_opt_sketch<T, A>::is_empty() const {
673
644
  return (h_ == 0 && r_ == 0);
674
645
  }
675
646
 
676
- template<typename T, typename S, typename A>
677
- void var_opt_sketch<T,S,A>::reset() {
647
+ template<typename T, typename A>
648
+ void var_opt_sketch<T, A>::reset() {
678
649
  const uint32_t prev_alloc = curr_items_alloc_;
679
650
  const uint32_t ceiling_lg_k = to_log_2(ceiling_power_of_2(k_));
680
651
  const uint32_t initial_lg_size = starting_sub_multiple(ceiling_lg_k, rf_, MIN_LG_ARR_ITEMS);
@@ -718,34 +689,34 @@ void var_opt_sketch<T,S,A>::reset() {
718
689
  filled_data_ = false;
719
690
  }
720
691
 
721
- template<typename T, typename S, typename A>
722
- uint64_t var_opt_sketch<T,S,A>::get_n() const {
692
+ template<typename T, typename A>
693
+ uint64_t var_opt_sketch<T, A>::get_n() const {
723
694
  return n_;
724
695
  }
725
696
 
726
- template<typename T, typename S, typename A>
727
- uint32_t var_opt_sketch<T,S,A>::get_k() const {
697
+ template<typename T, typename A>
698
+ uint32_t var_opt_sketch<T, A>::get_k() const {
728
699
  return k_;
729
700
  }
730
701
 
731
- template<typename T, typename S, typename A>
732
- uint32_t var_opt_sketch<T,S,A>::get_num_samples() const {
702
+ template<typename T, typename A>
703
+ uint32_t var_opt_sketch<T, A>::get_num_samples() const {
733
704
  const uint32_t num_in_sketch = h_ + r_;
734
705
  return (num_in_sketch < k_ ? num_in_sketch : k_);
735
706
  }
736
707
 
737
- template<typename T, typename S, typename A>
738
- void var_opt_sketch<T,S,A>::update(const T& item, double weight) {
708
+ template<typename T, typename A>
709
+ void var_opt_sketch<T, A>::update(const T& item, double weight) {
739
710
  update(item, weight, false);
740
711
  }
741
712
 
742
- template<typename T, typename S, typename A>
743
- void var_opt_sketch<T,S,A>::update(T&& item, double weight) {
713
+ template<typename T, typename A>
714
+ void var_opt_sketch<T, A>::update(T&& item, double weight) {
744
715
  update(std::move(item), weight, false);
745
716
  }
746
717
 
747
- template<typename T, typename S, typename A>
748
- string<A> var_opt_sketch<T,S,A>::to_string() const {
718
+ template<typename T, typename A>
719
+ string<A> var_opt_sketch<T, A>::to_string() const {
749
720
  // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
750
721
  // The stream does not support passing an allocator instance, and alternatives are complicated.
751
722
  std::ostringstream os;
@@ -760,8 +731,8 @@ string<A> var_opt_sketch<T,S,A>::to_string() const {
760
731
  return string<A>(os.str().c_str(), allocator_);
761
732
  }
762
733
 
763
- template<typename T, typename S, typename A>
764
- string<A> var_opt_sketch<T,S,A>::items_to_string() const {
734
+ template<typename T, typename A>
735
+ string<A> var_opt_sketch<T, A>::items_to_string() const {
765
736
  // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
766
737
  // The stream does not support passing an allocator instance, and alternatives are complicated.
767
738
  std::ostringstream os;
@@ -774,8 +745,8 @@ string<A> var_opt_sketch<T,S,A>::items_to_string() const {
774
745
  return string<A>(os.str().c_str(), allocator_);
775
746
  }
776
747
 
777
- template<typename T, typename S, typename A>
778
- string<A> var_opt_sketch<T,S,A>::items_to_string(bool print_gap) const {
748
+ template<typename T, typename A>
749
+ string<A> var_opt_sketch<T, A>::items_to_string(bool print_gap) const {
779
750
  // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
780
751
  // The stream does not support passing an allocator instance, and alternatives are complicated.
781
752
  std::ostringstream os;
@@ -798,9 +769,9 @@ string<A> var_opt_sketch<T,S,A>::items_to_string(bool print_gap) const {
798
769
  return string<A>(os.str().c_str(), allocator_);
799
770
  }
800
771
 
801
- template<typename T, typename S, typename A>
772
+ template<typename T, typename A>
802
773
  template<typename O>
803
- void var_opt_sketch<T,S,A>::update(O&& item, double weight, bool mark) {
774
+ void var_opt_sketch<T, A>::update(O&& item, double weight, bool mark) {
804
775
  if (weight < 0.0 || std::isnan(weight) || std::isinf(weight)) {
805
776
  throw std::invalid_argument("Item weights must be nonnegative and finite. Found: "
806
777
  + std::to_string(weight));
@@ -838,9 +809,9 @@ void var_opt_sketch<T,S,A>::update(O&& item, double weight, bool mark) {
838
809
  }
839
810
  }
840
811
 
841
- template<typename T, typename S, typename A>
812
+ template<typename T, typename A>
842
813
  template<typename O>
843
- void var_opt_sketch<T,S,A>::update_warmup_phase(O&& item, double weight, bool mark) {
814
+ void var_opt_sketch<T, A>::update_warmup_phase(O&& item, double weight, bool mark) {
844
815
  // seems overly cautious
845
816
  if (r_ > 0 || m_ != 0 || h_ > k_) throw std::logic_error("invalid sketch state during warmup");
846
817
 
@@ -868,14 +839,15 @@ void var_opt_sketch<T,S,A>::update_warmup_phase(O&& item, double weight, bool ma
868
839
  would appear to the right of the R items in a hypothetical reverse-sorted
869
840
  list. It is easy to prove that it is light enough to be part of this
870
841
  round's downsampling */
871
- template<typename T, typename S, typename A>
842
+ template<typename T, typename A>
872
843
  template<typename O>
873
- void var_opt_sketch<T,S,A>::update_light(O&& item, double weight, bool mark) {
844
+ void var_opt_sketch<T, A>::update_light(O&& item, double weight, bool mark) {
874
845
  if (r_ == 0 || (r_ + h_) != k_) throw std::logic_error("invalid sketch state during light warmup");
875
846
 
876
847
  const uint32_t m_slot = h_; // index of the gap, which becomes the M region
877
848
  if (filled_data_) {
878
- data_[m_slot] = std::forward<O>(item);
849
+ if (&data_[m_slot] != &item)
850
+ data_[m_slot] = std::forward<O>(item);
879
851
  } else {
880
852
  new (&data_[m_slot]) T(std::forward<O>(item));
881
853
  filled_data_ = true;
@@ -895,9 +867,9 @@ void var_opt_sketch<T,S,A>::update_light(O&& item, double weight, bool mark) {
895
867
  In other words, it might go into the heap and then come right back out,
896
868
  but that should be okay because pseudo_heavy items cannot predominate
897
869
  in long streams unless (max wt) / (min wt) > o(exp(N)) */
898
- template<typename T, typename S, typename A>
870
+ template<typename T, typename A>
899
871
  template<typename O>
900
- void var_opt_sketch<T,S,A>::update_heavy_general(O&& item, double weight, bool mark) {
872
+ void var_opt_sketch<T, A>::update_heavy_general(O&& item, double weight, bool mark) {
901
873
  if (r_ < 2 || m_ != 0 || (r_ + h_) != k_) throw std::logic_error("invalid sketch state during heavy general update");
902
874
 
903
875
  // put into H, although may come back out momentarily
@@ -909,9 +881,9 @@ void var_opt_sketch<T,S,A>::update_heavy_general(O&& item, double weight, bool m
909
881
  /* The analysis of this case is similar to that of the general heavy case.
910
882
  The one small technical difference is that since R < 2, we must grab an M item
911
883
  to have a valid starting point for continue_by_growing_candidate_set () */
912
- template<typename T, typename S, typename A>
884
+ template<typename T, typename A>
913
885
  template<typename O>
914
- void var_opt_sketch<T,S,A>::update_heavy_r_eq1(O&& item, double weight, bool mark) {
886
+ void var_opt_sketch<T, A>::update_heavy_r_eq1(O&& item, double weight, bool mark) {
915
887
  if (r_ != 1 || m_ != 0 || (r_ + h_) != k_) throw std::logic_error("invalid sketch state during heavy r=1 update");
916
888
 
917
889
  push(std::forward<O>(item), weight, mark); // new item into H
@@ -929,8 +901,8 @@ void var_opt_sketch<T,S,A>::update_heavy_r_eq1(O&& item, double weight, bool mar
929
901
  * <p>Subject to certain pre-conditions, decreasing k causes tau to increase. This fact is used by
930
902
  * the unioning algorithm to force "marked" items out of H and into the reservoir region.</p>
931
903
  */
932
- template<typename T, typename S, typename A>
933
- void var_opt_sketch<T,S,A>::decrease_k_by_1() {
904
+ template<typename T, typename A>
905
+ void var_opt_sketch<T, A>::decrease_k_by_1() {
934
906
  if (k_ <= 1) {
935
907
  throw std::logic_error("Cannot decrease k below 1 in union");
936
908
  }
@@ -952,9 +924,10 @@ void var_opt_sketch<T,S,A>::decrease_k_by_1() {
952
924
  // first, slide the R zone to the left by 1, temporarily filling the gap
953
925
  const uint32_t old_gap_idx = h_;
954
926
  const uint32_t old_final_r_idx = (h_ + 1 + r_) - 1;
955
- //if (old_final_r_idx != k_) throw std::logic_error("gadget in invalid state");
927
+ if (old_final_r_idx != k_) throw std::logic_error("gadget in invalid state");
956
928
 
957
929
  swap_values(old_final_r_idx, old_gap_idx);
930
+ filled_data_ = true; // we just filled the gap, and no need to check previous state
958
931
 
959
932
  // now we pull an item out of H; any item is ok, but if we grab the rightmost and then
960
933
  // reduce h_, the heap invariant will be preserved (and the gap will be restored), plus
@@ -987,8 +960,8 @@ void var_opt_sketch<T,S,A>::decrease_k_by_1() {
987
960
  }
988
961
  }
989
962
 
990
- template<typename T, typename S, typename A>
991
- void var_opt_sketch<T,S,A>::allocate_data_arrays(uint32_t tgt_size, bool use_marks) {
963
+ template<typename T, typename A>
964
+ void var_opt_sketch<T, A>::allocate_data_arrays(uint32_t tgt_size, bool use_marks) {
992
965
  filled_data_ = false;
993
966
 
994
967
  data_ = allocator_.allocate(tgt_size);
@@ -1001,8 +974,8 @@ void var_opt_sketch<T,S,A>::allocate_data_arrays(uint32_t tgt_size, bool use_mar
1001
974
  }
1002
975
  }
1003
976
 
1004
- template<typename T, typename S, typename A>
1005
- void var_opt_sketch<T,S,A>::grow_data_arrays() {
977
+ template<typename T, typename A>
978
+ void var_opt_sketch<T, A>::grow_data_arrays() {
1006
979
  const uint32_t prev_size = curr_items_alloc_;
1007
980
  curr_items_alloc_ = get_adjusted_size(k_, curr_items_alloc_ << rf_);
1008
981
  if (curr_items_alloc_ == k_) {
@@ -1038,8 +1011,8 @@ void var_opt_sketch<T,S,A>::grow_data_arrays() {
1038
1011
  }
1039
1012
  }
1040
1013
 
1041
- template<typename T, typename S, typename A>
1042
- void var_opt_sketch<T,S,A>::transition_from_warmup() {
1014
+ template<typename T, typename A>
1015
+ void var_opt_sketch<T, A>::transition_from_warmup() {
1043
1016
  // Move the 2 lightest items from H to M
1044
1017
  // But the lighter really belongs in R, so update counts to reflect that
1045
1018
  convert_to_heap();
@@ -1061,8 +1034,8 @@ void var_opt_sketch<T,S,A>::transition_from_warmup() {
1061
1034
  grow_candidate_set(weights_[k_ - 1] + total_wt_r_, 2);
1062
1035
  }
1063
1036
 
1064
- template<typename T, typename S, typename A>
1065
- void var_opt_sketch<T,S,A>::convert_to_heap() {
1037
+ template<typename T, typename A>
1038
+ void var_opt_sketch<T, A>::convert_to_heap() {
1066
1039
  if (h_ < 2) {
1067
1040
  return; // nothing to do
1068
1041
  }
@@ -1081,8 +1054,8 @@ void var_opt_sketch<T,S,A>::convert_to_heap() {
1081
1054
  //}
1082
1055
  }
1083
1056
 
1084
- template<typename T, typename S, typename A>
1085
- void var_opt_sketch<T,S,A>::restore_towards_leaves(uint32_t slot_in) {
1057
+ template<typename T, typename A>
1058
+ void var_opt_sketch<T, A>::restore_towards_leaves(uint32_t slot_in) {
1086
1059
  const uint32_t last_slot = h_ - 1;
1087
1060
  if (h_ == 0 || slot_in > last_slot) throw std::logic_error("invalid heap state");
1088
1061
 
@@ -1109,8 +1082,8 @@ void var_opt_sketch<T,S,A>::restore_towards_leaves(uint32_t slot_in) {
1109
1082
  }
1110
1083
  }
1111
1084
 
1112
- template<typename T, typename S, typename A>
1113
- void var_opt_sketch<T,S,A>::restore_towards_root(uint32_t slot_in) {
1085
+ template<typename T, typename A>
1086
+ void var_opt_sketch<T, A>::restore_towards_root(uint32_t slot_in) {
1114
1087
  uint32_t slot = slot_in;
1115
1088
  uint32_t p = (((slot + 1) / 2) - 1); // valid if slot >= 1
1116
1089
  while ((slot > 0) && (weights_[slot] < weights_[p])) {
@@ -1120,11 +1093,12 @@ void var_opt_sketch<T,S,A>::restore_towards_root(uint32_t slot_in) {
1120
1093
  }
1121
1094
  }
1122
1095
 
1123
- template<typename T, typename S, typename A>
1096
+ template<typename T, typename A>
1124
1097
  template<typename O>
1125
- void var_opt_sketch<T,S,A>::push(O&& item, double wt, bool mark) {
1098
+ void var_opt_sketch<T, A>::push(O&& item, double wt, bool mark) {
1126
1099
  if (filled_data_) {
1127
- data_[h_] = std::forward<O>(item);
1100
+ if (&data_[h_] != &item)
1101
+ data_[h_] = std::forward<O>(item);
1128
1102
  } else {
1129
1103
  new (&data_[h_]) T(std::forward<O>(item));
1130
1104
  filled_data_ = true;
@@ -1139,8 +1113,8 @@ void var_opt_sketch<T,S,A>::push(O&& item, double wt, bool mark) {
1139
1113
  restore_towards_root(h_ - 1); // need use old h_, but want accurate h_
1140
1114
  }
1141
1115
 
1142
- template<typename T, typename S, typename A>
1143
- void var_opt_sketch<T,S,A>::pop_min_to_m_region() {
1116
+ template<typename T, typename A>
1117
+ void var_opt_sketch<T, A>::pop_min_to_m_region() {
1144
1118
  if (h_ == 0 || (h_ + m_ + r_ != k_ + 1))
1145
1119
  throw std::logic_error("invalid heap state popping min to M region");
1146
1120
 
@@ -1164,8 +1138,8 @@ void var_opt_sketch<T,S,A>::pop_min_to_m_region() {
1164
1138
  }
1165
1139
 
1166
1140
 
1167
- template<typename T, typename S, typename A>
1168
- void var_opt_sketch<T,S,A>::swap_values(uint32_t src, uint32_t dst) {
1141
+ template<typename T, typename A>
1142
+ void var_opt_sketch<T, A>::swap_values(uint32_t src, uint32_t dst) {
1169
1143
  std::swap(data_[src], data_[dst]);
1170
1144
  std::swap(weights_[src], weights_[dst]);
1171
1145
 
@@ -1182,8 +1156,8 @@ void var_opt_sketch<T,S,A>::swap_values(uint32_t src, uint32_t dst) {
1182
1156
  of cands is at least 2. We will now grow the candidate set as much as possible
1183
1157
  by pulling sufficiently light items from h to m.
1184
1158
  */
1185
- template<typename T, typename S, typename A>
1186
- void var_opt_sketch<T,S,A>::grow_candidate_set(double wt_cands, uint32_t num_cands) {
1159
+ template<typename T, typename A>
1160
+ void var_opt_sketch<T, A>::grow_candidate_set(double wt_cands, uint32_t num_cands) {
1187
1161
  if ((h_ + m_ + r_ != k_ + 1) || (num_cands < 1) || (num_cands != m_ + r_) || (m_ >= 2))
1188
1162
  throw std::logic_error("invariant violated when growing candidate set");
1189
1163
 
@@ -1206,8 +1180,8 @@ void var_opt_sketch<T,S,A>::grow_candidate_set(double wt_cands, uint32_t num_can
1206
1180
  downsample_candidate_set(wt_cands, num_cands);
1207
1181
  }
1208
1182
 
1209
- template<typename T, typename S, typename A>
1210
- void var_opt_sketch<T,S,A>::downsample_candidate_set(double wt_cands, uint32_t num_cands) {
1183
+ template<typename T, typename A>
1184
+ void var_opt_sketch<T, A>::downsample_candidate_set(double wt_cands, uint32_t num_cands) {
1211
1185
  if (num_cands < 2 || h_ + num_cands != k_ + 1)
1212
1186
  throw std::logic_error("invalid num_cands when downsampling");
1213
1187
 
@@ -1225,17 +1199,16 @@ void var_opt_sketch<T,S,A>::downsample_candidate_set(double wt_cands, uint32_t n
1225
1199
  weights_[j] = -1.0;
1226
1200
  }
1227
1201
 
1228
- // The next two lines work even when delete_slot == leftmost_cand_slot
1202
+ // The next line works even when delete_slot == leftmost_cand_slot
1229
1203
  data_[delete_slot] = std::move(data_[leftmost_cand_slot]);
1230
- // cannot set data_[leftmost_cand_slot] to null since not uisng T*
1231
1204
 
1232
1205
  m_ = 0;
1233
1206
  r_ = num_cands - 1;
1234
1207
  total_wt_r_ = wt_cands;
1235
1208
  }
1236
1209
 
1237
- template<typename T, typename S, typename A>
1238
- uint32_t var_opt_sketch<T,S,A>::choose_delete_slot(double wt_cands, uint32_t num_cands) const {
1210
+ template<typename T, typename A>
1211
+ uint32_t var_opt_sketch<T, A>::choose_delete_slot(double wt_cands, uint32_t num_cands) const {
1239
1212
  if (r_ == 0) throw std::logic_error("choosing delete slot while in exact mode");
1240
1213
 
1241
1214
  if (m_ == 0) {
@@ -1262,8 +1235,8 @@ uint32_t var_opt_sketch<T,S,A>::choose_delete_slot(double wt_cands, uint32_t num
1262
1235
  }
1263
1236
  }
1264
1237
 
1265
- template<typename T, typename S, typename A>
1266
- uint32_t var_opt_sketch<T,S,A>::choose_weighted_delete_slot(double wt_cands, uint32_t num_cands) const {
1238
+ template<typename T, typename A>
1239
+ uint32_t var_opt_sketch<T, A>::choose_weighted_delete_slot(double wt_cands, uint32_t num_cands) const {
1267
1240
  if (m_ < 1) throw std::logic_error("must have weighted delete slot");
1268
1241
 
1269
1242
  const uint32_t offset = h_;
@@ -1286,8 +1259,8 @@ uint32_t var_opt_sketch<T,S,A>::choose_weighted_delete_slot(double wt_cands, uin
1286
1259
  return final_m + 1;
1287
1260
  }
1288
1261
 
1289
- template<typename T, typename S, typename A>
1290
- uint32_t var_opt_sketch<T,S,A>::pick_random_slot_in_r() const {
1262
+ template<typename T, typename A>
1263
+ uint32_t var_opt_sketch<T, A>::pick_random_slot_in_r() const {
1291
1264
  if (r_ == 0) throw std::logic_error("r_ = 0 when picking slot in R region");
1292
1265
  const uint32_t offset = h_ + m_;
1293
1266
  if (r_ == 1) {
@@ -1297,32 +1270,32 @@ uint32_t var_opt_sketch<T,S,A>::pick_random_slot_in_r() const {
1297
1270
  }
1298
1271
  }
1299
1272
 
1300
- template<typename T, typename S, typename A>
1301
- double var_opt_sketch<T,S,A>::peek_min() const {
1273
+ template<typename T, typename A>
1274
+ double var_opt_sketch<T, A>::peek_min() const {
1302
1275
  if (h_ == 0) throw std::logic_error("h_ = 0 when checking min in H region");
1303
1276
  return weights_[0];
1304
1277
  }
1305
1278
 
1306
- template<typename T, typename S, typename A>
1307
- inline bool var_opt_sketch<T,S,A>::is_marked(uint32_t idx) const {
1279
+ template<typename T, typename A>
1280
+ inline bool var_opt_sketch<T, A>::is_marked(uint32_t idx) const {
1308
1281
  return marks_ == nullptr ? false : marks_[idx];
1309
1282
  }
1310
1283
 
1311
- template<typename T, typename S, typename A>
1312
- double var_opt_sketch<T,S,A>::get_tau() const {
1284
+ template<typename T, typename A>
1285
+ double var_opt_sketch<T, A>::get_tau() const {
1313
1286
  return r_ == 0 ? std::nan("1") : (total_wt_r_ / r_);
1314
1287
  }
1315
1288
 
1316
- template<typename T, typename S, typename A>
1317
- void var_opt_sketch<T,S,A>::strip_marks() {
1289
+ template<typename T, typename A>
1290
+ void var_opt_sketch<T, A>::strip_marks() {
1318
1291
  if (marks_ == nullptr) throw std::logic_error("request to strip marks from non-gadget");
1319
1292
  num_marks_in_h_ = 0;
1320
1293
  AllocBool(allocator_).deallocate(marks_, curr_items_alloc_);
1321
1294
  marks_ = nullptr;
1322
1295
  }
1323
1296
 
1324
- template<typename T, typename S, typename A>
1325
- void var_opt_sketch<T,S,A>::check_preamble_longs(uint8_t preamble_longs, uint8_t flags) {
1297
+ template<typename T, typename A>
1298
+ void var_opt_sketch<T, A>::check_preamble_longs(uint8_t preamble_longs, uint8_t flags) {
1326
1299
  const bool is_empty(flags & EMPTY_FLAG_MASK);
1327
1300
 
1328
1301
  if (is_empty) {
@@ -1342,8 +1315,8 @@ void var_opt_sketch<T,S,A>::check_preamble_longs(uint8_t preamble_longs, uint8_t
1342
1315
  }
1343
1316
  }
1344
1317
 
1345
- template<typename T, typename S, typename A>
1346
- void var_opt_sketch<T,S,A>::check_family_and_serialization_version(uint8_t family_id, uint8_t ser_ver) {
1318
+ template<typename T, typename A>
1319
+ void var_opt_sketch<T, A>::check_family_and_serialization_version(uint8_t family_id, uint8_t ser_ver) {
1347
1320
  if (family_id == FAMILY_ID) {
1348
1321
  if (ser_ver != SER_VER) {
1349
1322
  throw std::invalid_argument("Possible corruption: VarOpt serialization version must be "
@@ -1357,8 +1330,8 @@ void var_opt_sketch<T,S,A>::check_family_and_serialization_version(uint8_t famil
1357
1330
  + std::to_string(FAMILY_ID) + ". Found: " + std::to_string(family_id));
1358
1331
  }
1359
1332
 
1360
- template<typename T, typename S, typename A>
1361
- uint32_t var_opt_sketch<T, S, A>::validate_and_get_target_size(uint32_t preamble_longs, uint32_t k, uint64_t n,
1333
+ template<typename T, typename A>
1334
+ uint32_t var_opt_sketch<T, A>::validate_and_get_target_size(uint32_t preamble_longs, uint32_t k, uint64_t n,
1362
1335
  uint32_t h, uint32_t r, resize_factor rf) {
1363
1336
  if (k == 0 || k > MAX_K) {
1364
1337
  throw std::invalid_argument("k must be at least 1 and less than 2^31 - 1");
@@ -1403,9 +1376,9 @@ uint32_t var_opt_sketch<T, S, A>::validate_and_get_target_size(uint32_t preamble
1403
1376
  return array_size;
1404
1377
  }
1405
1378
 
1406
- template<typename T, typename S, typename A>
1379
+ template<typename T, typename A>
1407
1380
  template<typename P>
1408
- subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(P predicate) const {
1381
+ subset_summary var_opt_sketch<T, A>::estimate_subset_sum(P predicate) const {
1409
1382
  if (n_ == 0) {
1410
1383
  return {0.0, 0.0, 0.0, 0.0};
1411
1384
  }
@@ -1451,8 +1424,8 @@ subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(P predicate) const {
1451
1424
  };
1452
1425
  }
1453
1426
 
1454
- template<typename T, typename S, typename A>
1455
- class var_opt_sketch<T, S, A>::items_deleter {
1427
+ template<typename T, typename A>
1428
+ class var_opt_sketch<T, A>::items_deleter {
1456
1429
  public:
1457
1430
  items_deleter(uint32_t num, const A& allocator) : num(num), h_count(0), r_count(0), allocator(allocator) {}
1458
1431
  void set_h(uint32_t h) { h_count = h; }
@@ -1480,8 +1453,8 @@ class var_opt_sketch<T, S, A>::items_deleter {
1480
1453
  A allocator;
1481
1454
  };
1482
1455
 
1483
- template<typename T, typename S, typename A>
1484
- class var_opt_sketch<T, S, A>::weights_deleter {
1456
+ template<typename T, typename A>
1457
+ class var_opt_sketch<T, A>::weights_deleter {
1485
1458
  public:
1486
1459
  weights_deleter(uint32_t num, const A& allocator) : num(num), allocator(allocator) {}
1487
1460
  void operator() (double* ptr) {
@@ -1494,8 +1467,8 @@ class var_opt_sketch<T, S, A>::weights_deleter {
1494
1467
  AllocDouble allocator;
1495
1468
  };
1496
1469
 
1497
- template<typename T, typename S, typename A>
1498
- class var_opt_sketch<T, S, A>::marks_deleter {
1470
+ template<typename T, typename A>
1471
+ class var_opt_sketch<T, A>::marks_deleter {
1499
1472
  public:
1500
1473
  marks_deleter(uint32_t num, const A& allocator) : num(num), allocator(allocator) {}
1501
1474
  void operator() (bool* ptr) {
@@ -1509,20 +1482,20 @@ class var_opt_sketch<T, S, A>::marks_deleter {
1509
1482
  };
1510
1483
 
1511
1484
 
1512
- template<typename T, typename S, typename A>
1513
- typename var_opt_sketch<T, S, A>::const_iterator var_opt_sketch<T, S, A>::begin() const {
1514
- return var_opt_sketch<T, S, A>::const_iterator(*this, false);
1485
+ template<typename T, typename A>
1486
+ typename var_opt_sketch<T, A>::const_iterator var_opt_sketch<T, A>::begin() const {
1487
+ return const_iterator(*this, false);
1515
1488
  }
1516
1489
 
1517
- template<typename T, typename S, typename A>
1518
- typename var_opt_sketch<T, S, A>::const_iterator var_opt_sketch<T, S, A>::end() const {
1519
- return var_opt_sketch<T, S, A>::const_iterator(*this, true);
1490
+ template<typename T, typename A>
1491
+ typename var_opt_sketch<T, A>::const_iterator var_opt_sketch<T, A>::end() const {
1492
+ return const_iterator(*this, true);
1520
1493
  }
1521
1494
 
1522
1495
  // -------- var_opt_sketch::const_iterator implementation ---------
1523
1496
 
1524
- template<typename T, typename S, typename A>
1525
- var_opt_sketch<T,S,A>::const_iterator::const_iterator(const var_opt_sketch<T,S,A>& sk, bool is_end) :
1497
+ template<typename T, typename A>
1498
+ var_opt_sketch<T, A>::const_iterator::const_iterator(const var_opt_sketch& sk, bool is_end) :
1526
1499
  sk_(&sk),
1527
1500
  cum_r_weight_(0.0),
1528
1501
  r_item_wt_(sk.get_tau()),
@@ -1540,8 +1513,8 @@ var_opt_sketch<T,S,A>::const_iterator::const_iterator(const var_opt_sketch<T,S,A
1540
1513
  if (idx_ == final_idx_) { sk_ = nullptr; }
1541
1514
  }
1542
1515
 
1543
- template<typename T, typename S, typename A>
1544
- var_opt_sketch<T,S,A>::const_iterator::const_iterator(const var_opt_sketch<T,S,A>& sk, bool is_end, bool use_r_region) :
1516
+ template<typename T, typename A>
1517
+ var_opt_sketch<T, A>::const_iterator::const_iterator(const var_opt_sketch& sk, bool is_end, bool use_r_region) :
1545
1518
  sk_(&sk),
1546
1519
  cum_r_weight_(0.0),
1547
1520
  r_item_wt_(sk.get_tau()),
@@ -1559,8 +1532,8 @@ var_opt_sketch<T,S,A>::const_iterator::const_iterator(const var_opt_sketch<T,S,A
1559
1532
  }
1560
1533
 
1561
1534
 
1562
- template<typename T, typename S, typename A>
1563
- var_opt_sketch<T, S, A>::const_iterator::const_iterator(const const_iterator& other) :
1535
+ template<typename T, typename A>
1536
+ var_opt_sketch<T, A>::const_iterator::const_iterator(const const_iterator& other) :
1564
1537
  sk_(other.sk_),
1565
1538
  cum_r_weight_(other.cum_r_weight_),
1566
1539
  r_item_wt_(other.r_item_wt_),
@@ -1568,8 +1541,8 @@ var_opt_sketch<T, S, A>::const_iterator::const_iterator(const const_iterator& ot
1568
1541
  final_idx_(other.final_idx_)
1569
1542
  {}
1570
1543
 
1571
- template<typename T, typename S, typename A>
1572
- typename var_opt_sketch<T, S, A>::const_iterator& var_opt_sketch<T, S, A>::const_iterator::operator++() {
1544
+ template<typename T, typename A>
1545
+ typename var_opt_sketch<T, A>::const_iterator& var_opt_sketch<T, A>::const_iterator::operator++() {
1573
1546
  ++idx_;
1574
1547
 
1575
1548
  if (idx_ == final_idx_) {
@@ -1582,27 +1555,27 @@ typename var_opt_sketch<T, S, A>::const_iterator& var_opt_sketch<T, S, A>::const
1582
1555
  return *this;
1583
1556
  }
1584
1557
 
1585
- template<typename T, typename S, typename A>
1586
- typename var_opt_sketch<T, S, A>::const_iterator& var_opt_sketch<T, S, A>::const_iterator::operator++(int) {
1558
+ template<typename T, typename A>
1559
+ typename var_opt_sketch<T, A>::const_iterator& var_opt_sketch<T, A>::const_iterator::operator++(int) {
1587
1560
  const_iterator tmp(*this);
1588
1561
  operator++();
1589
1562
  return tmp;
1590
1563
  }
1591
1564
 
1592
- template<typename T, typename S, typename A>
1593
- bool var_opt_sketch<T, S, A>::const_iterator::operator==(const const_iterator& other) const {
1565
+ template<typename T, typename A>
1566
+ bool var_opt_sketch<T, A>::const_iterator::operator==(const const_iterator& other) const {
1594
1567
  if (sk_ != other.sk_) return false;
1595
1568
  if (sk_ == nullptr) return true; // end (and we know other.sk_ is also null)
1596
1569
  return idx_ == other.idx_;
1597
1570
  }
1598
1571
 
1599
- template<typename T, typename S, typename A>
1600
- bool var_opt_sketch<T, S, A>::const_iterator::operator!=(const const_iterator& other) const {
1572
+ template<typename T, typename A>
1573
+ bool var_opt_sketch<T, A>::const_iterator::operator!=(const const_iterator& other) const {
1601
1574
  return !operator==(other);
1602
1575
  }
1603
1576
 
1604
- template<typename T, typename S, typename A>
1605
- const std::pair<const T&, const double> var_opt_sketch<T, S, A>::const_iterator::operator*() const {
1577
+ template<typename T, typename A>
1578
+ const std::pair<const T&, const double> var_opt_sketch<T, A>::const_iterator::operator*() const {
1606
1579
  double wt;
1607
1580
  if (idx_ < sk_->h_) {
1608
1581
  wt = sk_->weights_[idx_];
@@ -1612,16 +1585,16 @@ const std::pair<const T&, const double> var_opt_sketch<T, S, A>::const_iterator:
1612
1585
  return std::pair<const T&, const double>(sk_->data_[idx_], wt);
1613
1586
  }
1614
1587
 
1615
- template<typename T, typename S, typename A>
1616
- bool var_opt_sketch<T, S, A>::const_iterator::get_mark() const {
1588
+ template<typename T, typename A>
1589
+ bool var_opt_sketch<T, A>::const_iterator::get_mark() const {
1617
1590
  return sk_->marks_ == nullptr ? false : sk_->marks_[idx_];
1618
1591
  }
1619
1592
 
1620
1593
 
1621
1594
  // -------- var_opt_sketch::iterator implementation ---------
1622
1595
 
1623
- template<typename T, typename S, typename A>
1624
- var_opt_sketch<T,S,A>::iterator::iterator(const var_opt_sketch<T,S,A>& sk, bool is_end, bool use_r_region) :
1596
+ template<typename T, typename A>
1597
+ var_opt_sketch<T, A>::iterator::iterator(const var_opt_sketch& sk, bool is_end, bool use_r_region) :
1625
1598
  sk_(&sk),
1626
1599
  cum_r_weight_(0.0),
1627
1600
  r_item_wt_(sk.get_tau()),
@@ -1638,8 +1611,8 @@ var_opt_sketch<T,S,A>::iterator::iterator(const var_opt_sketch<T,S,A>& sk, bool
1638
1611
  if (idx_ == final_idx_) { sk_ = nullptr; }
1639
1612
  }
1640
1613
 
1641
- template<typename T, typename S, typename A>
1642
- var_opt_sketch<T, S, A>::iterator::iterator(const iterator& other) :
1614
+ template<typename T, typename A>
1615
+ var_opt_sketch<T, A>::iterator::iterator(const iterator& other) :
1643
1616
  sk_(other.sk_),
1644
1617
  cum_r_weight_(other.cum_r_weight_),
1645
1618
  r_item_wt_(other.r_item_wt_),
@@ -1647,8 +1620,8 @@ var_opt_sketch<T, S, A>::iterator::iterator(const iterator& other) :
1647
1620
  final_idx_(other.final_idx_)
1648
1621
  {}
1649
1622
 
1650
- template<typename T, typename S, typename A>
1651
- typename var_opt_sketch<T, S, A>::iterator& var_opt_sketch<T, S, A>::iterator::operator++() {
1623
+ template<typename T, typename A>
1624
+ typename var_opt_sketch<T, A>::iterator& var_opt_sketch<T, A>::iterator::operator++() {
1652
1625
  ++idx_;
1653
1626
 
1654
1627
  if (idx_ == final_idx_) {
@@ -1661,27 +1634,27 @@ typename var_opt_sketch<T, S, A>::iterator& var_opt_sketch<T, S, A>::iterator::o
1661
1634
  return *this;
1662
1635
  }
1663
1636
 
1664
- template<typename T, typename S, typename A>
1665
- typename var_opt_sketch<T, S, A>::iterator& var_opt_sketch<T, S, A>::iterator::operator++(int) {
1637
+ template<typename T, typename A>
1638
+ typename var_opt_sketch<T, A>::iterator& var_opt_sketch<T, A>::iterator::operator++(int) {
1666
1639
  const_iterator tmp(*this);
1667
1640
  operator++();
1668
1641
  return tmp;
1669
1642
  }
1670
1643
 
1671
- template<typename T, typename S, typename A>
1672
- bool var_opt_sketch<T, S, A>::iterator::operator==(const iterator& other) const {
1644
+ template<typename T, typename A>
1645
+ bool var_opt_sketch<T, A>::iterator::operator==(const iterator& other) const {
1673
1646
  if (sk_ != other.sk_) return false;
1674
1647
  if (sk_ == nullptr) return true; // end (and we know other.sk_ is also null)
1675
1648
  return idx_ == other.idx_;
1676
1649
  }
1677
1650
 
1678
- template<typename T, typename S, typename A>
1679
- bool var_opt_sketch<T, S, A>::iterator::operator!=(const iterator& other) const {
1651
+ template<typename T, typename A>
1652
+ bool var_opt_sketch<T, A>::iterator::operator!=(const iterator& other) const {
1680
1653
  return !operator==(other);
1681
1654
  }
1682
1655
 
1683
- template<typename T, typename S, typename A>
1684
- std::pair<T&, double> var_opt_sketch<T, S, A>::iterator::operator*() {
1656
+ template<typename T, typename A>
1657
+ std::pair<T&, double> var_opt_sketch<T, A>::iterator::operator*() {
1685
1658
  double wt;
1686
1659
  if (idx_ < sk_->h_) {
1687
1660
  wt = sk_->weights_[idx_];
@@ -1693,8 +1666,8 @@ std::pair<T&, double> var_opt_sketch<T, S, A>::iterator::operator*() {
1693
1666
  return std::pair<T&, double>(sk_->data_[idx_], wt);
1694
1667
  }
1695
1668
 
1696
- template<typename T, typename S, typename A>
1697
- bool var_opt_sketch<T, S, A>::iterator::get_mark() const {
1669
+ template<typename T, typename A>
1670
+ bool var_opt_sketch<T, A>::iterator::get_mark() const {
1698
1671
  return sk_->marks_ == nullptr ? false : sk_->marks_[idx_];
1699
1672
  }
1700
1673
 
@@ -1702,40 +1675,40 @@ bool var_opt_sketch<T, S, A>::iterator::get_mark() const {
1702
1675
  * Checks if target sampling allocation is more than 50% of max sampling size.
1703
1676
  * If so, returns max sampling size, otherwise passes through target size.
1704
1677
  */
1705
- template<typename T, typename S, typename A>
1706
- uint32_t var_opt_sketch<T,S,A>::get_adjusted_size(uint32_t max_size, uint32_t resize_target) {
1707
- if (max_size - (resize_target << 1) < 0L) {
1678
+ template<typename T, typename A>
1679
+ uint32_t var_opt_sketch<T, A>::get_adjusted_size(uint32_t max_size, uint32_t resize_target) {
1680
+ if (max_size < (resize_target << 1)) {
1708
1681
  return max_size;
1709
1682
  }
1710
1683
  return resize_target;
1711
1684
  }
1712
1685
 
1713
- template<typename T, typename S, typename A>
1714
- uint32_t var_opt_sketch<T,S,A>::starting_sub_multiple(uint32_t lg_target, uint32_t lg_rf, uint32_t lg_min) {
1686
+ template<typename T, typename A>
1687
+ uint32_t var_opt_sketch<T, A>::starting_sub_multiple(uint32_t lg_target, uint32_t lg_rf, uint32_t lg_min) {
1715
1688
  return (lg_target <= lg_min)
1716
1689
  ? lg_min : (lg_rf == 0) ? lg_target
1717
1690
  : (lg_target - lg_min) % lg_rf + lg_min;
1718
1691
  }
1719
1692
 
1720
- template<typename T, typename S, typename A>
1721
- double var_opt_sketch<T,S,A>::pseudo_hypergeometric_ub_on_p(uint64_t n, uint32_t k, double sampling_rate) {
1693
+ template<typename T, typename A>
1694
+ double var_opt_sketch<T, A>::pseudo_hypergeometric_ub_on_p(uint64_t n, uint32_t k, double sampling_rate) {
1722
1695
  const double adjusted_kappa = DEFAULT_KAPPA * sqrt(1 - sampling_rate);
1723
1696
  return bounds_binomial_proportions::approximate_upper_bound_on_p(n, k, adjusted_kappa);
1724
1697
  }
1725
1698
 
1726
- template<typename T, typename S, typename A>
1727
- double var_opt_sketch<T,S,A>::pseudo_hypergeometric_lb_on_p(uint64_t n, uint32_t k, double sampling_rate) {
1699
+ template<typename T, typename A>
1700
+ double var_opt_sketch<T, A>::pseudo_hypergeometric_lb_on_p(uint64_t n, uint32_t k, double sampling_rate) {
1728
1701
  const double adjusted_kappa = DEFAULT_KAPPA * sqrt(1 - sampling_rate);
1729
1702
  return bounds_binomial_proportions::approximate_lower_bound_on_p(n, k, adjusted_kappa);
1730
1703
  }
1731
1704
 
1732
- template<typename T, typename S, typename A>
1733
- bool var_opt_sketch<T,S,A>::is_power_of_2(uint32_t v) {
1705
+ template<typename T, typename A>
1706
+ bool var_opt_sketch<T, A>::is_power_of_2(uint32_t v) {
1734
1707
  return v && !(v & (v - 1));
1735
1708
  }
1736
1709
 
1737
- template<typename T, typename S, typename A>
1738
- uint32_t var_opt_sketch<T,S,A>::to_log_2(uint32_t v) {
1710
+ template<typename T, typename A>
1711
+ uint32_t var_opt_sketch<T, A>::to_log_2(uint32_t v) {
1739
1712
  if (is_power_of_2(v)) {
1740
1713
  return count_trailing_zeros_in_u32(v);
1741
1714
  } else {
@@ -1744,14 +1717,14 @@ uint32_t var_opt_sketch<T,S,A>::to_log_2(uint32_t v) {
1744
1717
  }
1745
1718
 
1746
1719
  // Returns an integer in the range [0, max_value) -- excludes max_value
1747
- template<typename T, typename S, typename A>
1748
- uint32_t var_opt_sketch<T,S,A>::next_int(uint32_t max_value) {
1720
+ template<typename T, typename A>
1721
+ uint32_t var_opt_sketch<T, A>::next_int(uint32_t max_value) {
1749
1722
  std::uniform_int_distribution<uint32_t> dist(0, max_value - 1);
1750
1723
  return dist(random_utils::rand);
1751
1724
  }
1752
1725
 
1753
- template<typename T, typename S, typename A>
1754
- double var_opt_sketch<T,S,A>::next_double_exclude_zero() {
1726
+ template<typename T, typename A>
1727
+ double var_opt_sketch<T, A>::next_double_exclude_zero() {
1755
1728
  double r = random_utils::next_double(random_utils::rand);
1756
1729
  while (r == 0.0) {
1757
1730
  r = random_utils::next_double(random_utils::rand);