datasketches 0.2.7 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -42,12 +42,12 @@ namespace datasketches {
42
42
  * author Kevin Lang
43
43
  * author Jon Malkin
44
44
  */
45
- template<typename T, typename S, typename A>
46
- var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf, const A& allocator) :
47
- var_opt_sketch<T,S,A>(k, rf, false, allocator) {}
45
+ template<typename T, typename A>
46
+ var_opt_sketch<T, A>::var_opt_sketch(uint32_t k, resize_factor rf, const A& allocator) :
47
+ var_opt_sketch(k, rf, false, allocator) {}
48
48
 
49
- template<typename T, typename S, typename A>
50
- var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other) :
49
+ template<typename T, typename A>
50
+ var_opt_sketch<T, A>::var_opt_sketch(const var_opt_sketch& other) :
51
51
  k_(other.k_),
52
52
  h_(other.h_),
53
53
  m_(other.m_),
@@ -83,8 +83,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other) :
83
83
  }
84
84
  }
85
85
 
86
- template<typename T, typename S, typename A>
87
- var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other, bool as_sketch, uint64_t adjusted_n) :
86
+ template<typename T, typename A>
87
+ var_opt_sketch<T, A>::var_opt_sketch(const var_opt_sketch& other, bool as_sketch, uint64_t adjusted_n) :
88
88
  k_(other.k_),
89
89
  h_(other.h_),
90
90
  m_(other.m_),
@@ -120,27 +120,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other, bool as_sketc
120
120
  }
121
121
  }
122
122
 
123
- template<typename T, typename S, typename A>
124
- var_opt_sketch<T,S,A>::var_opt_sketch(T* data, double* weights, size_t len,
125
- uint32_t k, uint64_t n, uint32_t h_count, uint32_t r_count, double total_wt_r, const A& allocator) :
126
- k_(k),
127
- h_(h_count),
128
- m_(0),
129
- r_(r_count),
130
- n_(n),
131
- total_wt_r_(total_wt_r),
132
- rf_(var_opt_constants::DEFAULT_RESIZE_FACTOR),
133
- curr_items_alloc_(len),
134
- filled_data_(n > k),
135
- allocator_(allocator),
136
- data_(data),
137
- weights_(weights),
138
- num_marks_in_h_(0),
139
- marks_(nullptr)
140
- {}
141
-
142
- template<typename T, typename S, typename A>
143
- var_opt_sketch<T,S,A>::var_opt_sketch(var_opt_sketch&& other) noexcept :
123
+ template<typename T, typename A>
124
+ var_opt_sketch<T, A>::var_opt_sketch(var_opt_sketch&& other) noexcept :
144
125
  k_(other.k_),
145
126
  h_(other.h_),
146
127
  m_(other.m_),
@@ -161,8 +142,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(var_opt_sketch&& other) noexcept :
161
142
  other.marks_ = nullptr;
162
143
  }
163
144
 
164
- template<typename T, typename S, typename A>
165
- var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadget, const A& allocator) :
145
+ template<typename T, typename A>
146
+ var_opt_sketch<T, A>::var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadget, const A& allocator) :
166
147
  k_(k), h_(0), m_(0), r_(0), n_(0), total_wt_r_(0.0), rf_(rf), allocator_(allocator) {
167
148
  if (k == 0 || k_ > MAX_K) {
168
149
  throw std::invalid_argument("k must be at least 1 and less than 2^31 - 1");
@@ -179,8 +160,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadg
179
160
  num_marks_in_h_ = 0;
180
161
  }
181
162
 
182
- template<typename T, typename S, typename A>
183
- var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, uint32_t h, uint32_t m, uint32_t r, uint64_t n, double total_wt_r, resize_factor rf,
163
+ template<typename T, typename A>
164
+ var_opt_sketch<T, A>::var_opt_sketch(uint32_t k, uint32_t h, uint32_t m, uint32_t r, uint64_t n, double total_wt_r, resize_factor rf,
184
165
  uint32_t curr_items_alloc, bool filled_data, std::unique_ptr<T, items_deleter> items,
185
166
  std::unique_ptr<double, weights_deleter> weights, uint32_t num_marks_in_h,
186
167
  std::unique_ptr<bool, marks_deleter> marks, const A& allocator) :
@@ -201,8 +182,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, uint32_t h, uint32_t m, uint32
201
182
  {}
202
183
 
203
184
 
204
- template<typename T, typename S, typename A>
205
- var_opt_sketch<T,S,A>::~var_opt_sketch() {
185
+ template<typename T, typename A>
186
+ var_opt_sketch<T, A>::~var_opt_sketch() {
206
187
  if (data_ != nullptr) {
207
188
  if (filled_data_) {
208
189
  // destroy everything
@@ -232,9 +213,9 @@ var_opt_sketch<T,S,A>::~var_opt_sketch() {
232
213
  }
233
214
  }
234
215
 
235
- template<typename T, typename S, typename A>
236
- var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(const var_opt_sketch& other) {
237
- var_opt_sketch<T,S,A> sk_copy(other);
216
+ template<typename T, typename A>
217
+ var_opt_sketch<T, A>& var_opt_sketch<T, A>::operator=(const var_opt_sketch& other) {
218
+ var_opt_sketch sk_copy(other);
238
219
  std::swap(k_, sk_copy.k_);
239
220
  std::swap(h_, sk_copy.h_);
240
221
  std::swap(m_, sk_copy.m_);
@@ -252,8 +233,8 @@ var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(const var_opt_sketch& ot
252
233
  return *this;
253
234
  }
254
235
 
255
- template<typename T, typename S, typename A>
256
- var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(var_opt_sketch&& other) {
236
+ template<typename T, typename A>
237
+ var_opt_sketch<T, A>& var_opt_sketch<T, A>::operator=(var_opt_sketch&& other) {
257
238
  std::swap(k_, other.k_);
258
239
  std::swap(h_, other.h_);
259
240
  std::swap(m_, other.m_);
@@ -311,9 +292,9 @@ var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(var_opt_sketch&& other)
311
292
  */
312
293
 
313
294
  // implementation for fixed-size arithmetic types (integral and floating point)
314
- template<typename T, typename S, typename A>
295
+ template<typename T, typename A>
315
296
  template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
316
- size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes(const SerDe&) const {
297
+ size_t var_opt_sketch<T, A>::get_serialized_size_bytes(const SerDe&) const {
317
298
  if (is_empty()) { return PREAMBLE_LONGS_EMPTY << 3; }
318
299
  size_t num_bytes = (r_ == 0 ? PREAMBLE_LONGS_WARMUP : PREAMBLE_LONGS_FULL) << 3;
319
300
  num_bytes += h_ * sizeof(double); // weights
@@ -325,9 +306,9 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes(const SerDe&) const {
325
306
  }
326
307
 
327
308
  // implementation for all other types
328
- template<typename T, typename S, typename A>
309
+ template<typename T, typename A>
329
310
  template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
330
- size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes(const SerDe& sd) const {
311
+ size_t var_opt_sketch<T, A>::get_serialized_size_bytes(const SerDe& sd) const {
331
312
  if (is_empty()) { return PREAMBLE_LONGS_EMPTY << 3; }
332
313
  size_t num_bytes = (r_ == 0 ? PREAMBLE_LONGS_WARMUP : PREAMBLE_LONGS_FULL) << 3;
333
314
  num_bytes += h_ * sizeof(double); // weights
@@ -340,9 +321,9 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes(const SerDe& sd) const {
340
321
  return num_bytes;
341
322
  }
342
323
 
343
- template<typename T, typename S, typename A>
324
+ template<typename T, typename A>
344
325
  template<typename SerDe>
345
- std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
326
+ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
346
327
  const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
347
328
  std::vector<uint8_t, AllocU8<A>> bytes(size, 0, allocator_);
348
329
  uint8_t* ptr = bytes.data() + header_size_bytes;
@@ -414,9 +395,9 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
414
395
  return bytes;
415
396
  }
416
397
 
417
- template<typename T, typename S, typename A>
398
+ template<typename T, typename A>
418
399
  template<typename SerDe>
419
- void var_opt_sketch<T,S,A>::serialize(std::ostream& os, const SerDe& sd) const {
400
+ void var_opt_sketch<T, A>::serialize(std::ostream& os, const SerDe& sd) const {
420
401
  const bool empty = (h_ == 0) && (r_ == 0);
421
402
 
422
403
  const uint8_t preLongs = (empty ? PREAMBLE_LONGS_EMPTY
@@ -477,14 +458,9 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os, const SerDe& sd) const {
477
458
  }
478
459
  }
479
460
 
480
- template<typename T, typename S, typename A>
481
- var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size_t size, const A& allocator) {
482
- return deserialize(bytes, size, S(), allocator);
483
- }
484
-
485
- template<typename T, typename S, typename A>
461
+ template<typename T, typename A>
486
462
  template<typename SerDe>
487
- var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
463
+ var_opt_sketch<T, A> var_opt_sketch<T, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
488
464
  ensure_minimum_memory(size, 8);
489
465
  const char* ptr = static_cast<const char*>(bytes);
490
466
  const char* base = ptr;
@@ -510,7 +486,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
510
486
  const bool is_gadget = flags & GADGET_FLAG_MASK;
511
487
 
512
488
  if (is_empty) {
513
- return var_opt_sketch<T,S,A>(k, rf, is_gadget, allocator);
489
+ return var_opt_sketch(k, rf, is_gadget, allocator);
514
490
  }
515
491
 
516
492
  // second and third prelongs
@@ -578,14 +554,9 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
578
554
  std::move(items), std::move(weights), num_marks_in_h, std::move(marks), allocator);
579
555
  }
580
556
 
581
- template<typename T, typename S, typename A>
582
- var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const A& allocator) {
583
- return deserialize(is, S(), allocator);
584
- }
585
-
586
- template<typename T, typename S, typename A>
557
+ template<typename T, typename A>
587
558
  template<typename SerDe>
588
- var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
559
+ var_opt_sketch<T, A> var_opt_sketch<T, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
589
560
  const auto first_byte = read<uint8_t>(is);
590
561
  uint8_t preamble_longs = first_byte & 0x3f;
591
562
  const resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
@@ -604,7 +575,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const
604
575
  if (!is.good())
605
576
  throw std::runtime_error("error reading from std::istream");
606
577
  else
607
- return var_opt_sketch<T,S,A>(k, rf, is_gadget, allocator);
578
+ return var_opt_sketch(k, rf, is_gadget, allocator);
608
579
  }
609
580
 
610
581
  // second and third prelongs
@@ -668,13 +639,13 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const
668
639
  std::move(items), std::move(weights), num_marks_in_h, std::move(marks), allocator);
669
640
  }
670
641
 
671
- template<typename T, typename S, typename A>
672
- bool var_opt_sketch<T,S,A>::is_empty() const {
642
+ template<typename T, typename A>
643
+ bool var_opt_sketch<T, A>::is_empty() const {
673
644
  return (h_ == 0 && r_ == 0);
674
645
  }
675
646
 
676
- template<typename T, typename S, typename A>
677
- void var_opt_sketch<T,S,A>::reset() {
647
+ template<typename T, typename A>
648
+ void var_opt_sketch<T, A>::reset() {
678
649
  const uint32_t prev_alloc = curr_items_alloc_;
679
650
  const uint32_t ceiling_lg_k = to_log_2(ceiling_power_of_2(k_));
680
651
  const uint32_t initial_lg_size = starting_sub_multiple(ceiling_lg_k, rf_, MIN_LG_ARR_ITEMS);
@@ -718,34 +689,34 @@ void var_opt_sketch<T,S,A>::reset() {
718
689
  filled_data_ = false;
719
690
  }
720
691
 
721
- template<typename T, typename S, typename A>
722
- uint64_t var_opt_sketch<T,S,A>::get_n() const {
692
+ template<typename T, typename A>
693
+ uint64_t var_opt_sketch<T, A>::get_n() const {
723
694
  return n_;
724
695
  }
725
696
 
726
- template<typename T, typename S, typename A>
727
- uint32_t var_opt_sketch<T,S,A>::get_k() const {
697
+ template<typename T, typename A>
698
+ uint32_t var_opt_sketch<T, A>::get_k() const {
728
699
  return k_;
729
700
  }
730
701
 
731
- template<typename T, typename S, typename A>
732
- uint32_t var_opt_sketch<T,S,A>::get_num_samples() const {
702
+ template<typename T, typename A>
703
+ uint32_t var_opt_sketch<T, A>::get_num_samples() const {
733
704
  const uint32_t num_in_sketch = h_ + r_;
734
705
  return (num_in_sketch < k_ ? num_in_sketch : k_);
735
706
  }
736
707
 
737
- template<typename T, typename S, typename A>
738
- void var_opt_sketch<T,S,A>::update(const T& item, double weight) {
708
+ template<typename T, typename A>
709
+ void var_opt_sketch<T, A>::update(const T& item, double weight) {
739
710
  update(item, weight, false);
740
711
  }
741
712
 
742
- template<typename T, typename S, typename A>
743
- void var_opt_sketch<T,S,A>::update(T&& item, double weight) {
713
+ template<typename T, typename A>
714
+ void var_opt_sketch<T, A>::update(T&& item, double weight) {
744
715
  update(std::move(item), weight, false);
745
716
  }
746
717
 
747
- template<typename T, typename S, typename A>
748
- string<A> var_opt_sketch<T,S,A>::to_string() const {
718
+ template<typename T, typename A>
719
+ string<A> var_opt_sketch<T, A>::to_string() const {
749
720
  // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
750
721
  // The stream does not support passing an allocator instance, and alternatives are complicated.
751
722
  std::ostringstream os;
@@ -760,8 +731,8 @@ string<A> var_opt_sketch<T,S,A>::to_string() const {
760
731
  return string<A>(os.str().c_str(), allocator_);
761
732
  }
762
733
 
763
- template<typename T, typename S, typename A>
764
- string<A> var_opt_sketch<T,S,A>::items_to_string() const {
734
+ template<typename T, typename A>
735
+ string<A> var_opt_sketch<T, A>::items_to_string() const {
765
736
  // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
766
737
  // The stream does not support passing an allocator instance, and alternatives are complicated.
767
738
  std::ostringstream os;
@@ -774,8 +745,8 @@ string<A> var_opt_sketch<T,S,A>::items_to_string() const {
774
745
  return string<A>(os.str().c_str(), allocator_);
775
746
  }
776
747
 
777
- template<typename T, typename S, typename A>
778
- string<A> var_opt_sketch<T,S,A>::items_to_string(bool print_gap) const {
748
+ template<typename T, typename A>
749
+ string<A> var_opt_sketch<T, A>::items_to_string(bool print_gap) const {
779
750
  // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
780
751
  // The stream does not support passing an allocator instance, and alternatives are complicated.
781
752
  std::ostringstream os;
@@ -798,9 +769,9 @@ string<A> var_opt_sketch<T,S,A>::items_to_string(bool print_gap) const {
798
769
  return string<A>(os.str().c_str(), allocator_);
799
770
  }
800
771
 
801
- template<typename T, typename S, typename A>
772
+ template<typename T, typename A>
802
773
  template<typename O>
803
- void var_opt_sketch<T,S,A>::update(O&& item, double weight, bool mark) {
774
+ void var_opt_sketch<T, A>::update(O&& item, double weight, bool mark) {
804
775
  if (weight < 0.0 || std::isnan(weight) || std::isinf(weight)) {
805
776
  throw std::invalid_argument("Item weights must be nonnegative and finite. Found: "
806
777
  + std::to_string(weight));
@@ -838,9 +809,9 @@ void var_opt_sketch<T,S,A>::update(O&& item, double weight, bool mark) {
838
809
  }
839
810
  }
840
811
 
841
- template<typename T, typename S, typename A>
812
+ template<typename T, typename A>
842
813
  template<typename O>
843
- void var_opt_sketch<T,S,A>::update_warmup_phase(O&& item, double weight, bool mark) {
814
+ void var_opt_sketch<T, A>::update_warmup_phase(O&& item, double weight, bool mark) {
844
815
  // seems overly cautious
845
816
  if (r_ > 0 || m_ != 0 || h_ > k_) throw std::logic_error("invalid sketch state during warmup");
846
817
 
@@ -868,14 +839,15 @@ void var_opt_sketch<T,S,A>::update_warmup_phase(O&& item, double weight, bool ma
868
839
  would appear to the right of the R items in a hypothetical reverse-sorted
869
840
  list. It is easy to prove that it is light enough to be part of this
870
841
  round's downsampling */
871
- template<typename T, typename S, typename A>
842
+ template<typename T, typename A>
872
843
  template<typename O>
873
- void var_opt_sketch<T,S,A>::update_light(O&& item, double weight, bool mark) {
844
+ void var_opt_sketch<T, A>::update_light(O&& item, double weight, bool mark) {
874
845
  if (r_ == 0 || (r_ + h_) != k_) throw std::logic_error("invalid sketch state during light warmup");
875
846
 
876
847
  const uint32_t m_slot = h_; // index of the gap, which becomes the M region
877
848
  if (filled_data_) {
878
- data_[m_slot] = std::forward<O>(item);
849
+ if (&data_[m_slot] != &item)
850
+ data_[m_slot] = std::forward<O>(item);
879
851
  } else {
880
852
  new (&data_[m_slot]) T(std::forward<O>(item));
881
853
  filled_data_ = true;
@@ -895,9 +867,9 @@ void var_opt_sketch<T,S,A>::update_light(O&& item, double weight, bool mark) {
895
867
  In other words, it might go into the heap and then come right back out,
896
868
  but that should be okay because pseudo_heavy items cannot predominate
897
869
  in long streams unless (max wt) / (min wt) > o(exp(N)) */
898
- template<typename T, typename S, typename A>
870
+ template<typename T, typename A>
899
871
  template<typename O>
900
- void var_opt_sketch<T,S,A>::update_heavy_general(O&& item, double weight, bool mark) {
872
+ void var_opt_sketch<T, A>::update_heavy_general(O&& item, double weight, bool mark) {
901
873
  if (r_ < 2 || m_ != 0 || (r_ + h_) != k_) throw std::logic_error("invalid sketch state during heavy general update");
902
874
 
903
875
  // put into H, although may come back out momentarily
@@ -909,9 +881,9 @@ void var_opt_sketch<T,S,A>::update_heavy_general(O&& item, double weight, bool m
909
881
  /* The analysis of this case is similar to that of the general heavy case.
910
882
  The one small technical difference is that since R < 2, we must grab an M item
911
883
  to have a valid starting point for continue_by_growing_candidate_set () */
912
- template<typename T, typename S, typename A>
884
+ template<typename T, typename A>
913
885
  template<typename O>
914
- void var_opt_sketch<T,S,A>::update_heavy_r_eq1(O&& item, double weight, bool mark) {
886
+ void var_opt_sketch<T, A>::update_heavy_r_eq1(O&& item, double weight, bool mark) {
915
887
  if (r_ != 1 || m_ != 0 || (r_ + h_) != k_) throw std::logic_error("invalid sketch state during heavy r=1 update");
916
888
 
917
889
  push(std::forward<O>(item), weight, mark); // new item into H
@@ -929,8 +901,8 @@ void var_opt_sketch<T,S,A>::update_heavy_r_eq1(O&& item, double weight, bool mar
929
901
  * <p>Subject to certain pre-conditions, decreasing k causes tau to increase. This fact is used by
930
902
  * the unioning algorithm to force "marked" items out of H and into the reservoir region.</p>
931
903
  */
932
- template<typename T, typename S, typename A>
933
- void var_opt_sketch<T,S,A>::decrease_k_by_1() {
904
+ template<typename T, typename A>
905
+ void var_opt_sketch<T, A>::decrease_k_by_1() {
934
906
  if (k_ <= 1) {
935
907
  throw std::logic_error("Cannot decrease k below 1 in union");
936
908
  }
@@ -952,9 +924,10 @@ void var_opt_sketch<T,S,A>::decrease_k_by_1() {
952
924
  // first, slide the R zone to the left by 1, temporarily filling the gap
953
925
  const uint32_t old_gap_idx = h_;
954
926
  const uint32_t old_final_r_idx = (h_ + 1 + r_) - 1;
955
- //if (old_final_r_idx != k_) throw std::logic_error("gadget in invalid state");
927
+ if (old_final_r_idx != k_) throw std::logic_error("gadget in invalid state");
956
928
 
957
929
  swap_values(old_final_r_idx, old_gap_idx);
930
+ filled_data_ = true; // we just filled the gap, and no need to check previous state
958
931
 
959
932
  // now we pull an item out of H; any item is ok, but if we grab the rightmost and then
960
933
  // reduce h_, the heap invariant will be preserved (and the gap will be restored), plus
@@ -987,8 +960,8 @@ void var_opt_sketch<T,S,A>::decrease_k_by_1() {
987
960
  }
988
961
  }
989
962
 
990
- template<typename T, typename S, typename A>
991
- void var_opt_sketch<T,S,A>::allocate_data_arrays(uint32_t tgt_size, bool use_marks) {
963
+ template<typename T, typename A>
964
+ void var_opt_sketch<T, A>::allocate_data_arrays(uint32_t tgt_size, bool use_marks) {
992
965
  filled_data_ = false;
993
966
 
994
967
  data_ = allocator_.allocate(tgt_size);
@@ -1001,8 +974,8 @@ void var_opt_sketch<T,S,A>::allocate_data_arrays(uint32_t tgt_size, bool use_mar
1001
974
  }
1002
975
  }
1003
976
 
1004
- template<typename T, typename S, typename A>
1005
- void var_opt_sketch<T,S,A>::grow_data_arrays() {
977
+ template<typename T, typename A>
978
+ void var_opt_sketch<T, A>::grow_data_arrays() {
1006
979
  const uint32_t prev_size = curr_items_alloc_;
1007
980
  curr_items_alloc_ = get_adjusted_size(k_, curr_items_alloc_ << rf_);
1008
981
  if (curr_items_alloc_ == k_) {
@@ -1038,8 +1011,8 @@ void var_opt_sketch<T,S,A>::grow_data_arrays() {
1038
1011
  }
1039
1012
  }
1040
1013
 
1041
- template<typename T, typename S, typename A>
1042
- void var_opt_sketch<T,S,A>::transition_from_warmup() {
1014
+ template<typename T, typename A>
1015
+ void var_opt_sketch<T, A>::transition_from_warmup() {
1043
1016
  // Move the 2 lightest items from H to M
1044
1017
  // But the lighter really belongs in R, so update counts to reflect that
1045
1018
  convert_to_heap();
@@ -1061,8 +1034,8 @@ void var_opt_sketch<T,S,A>::transition_from_warmup() {
1061
1034
  grow_candidate_set(weights_[k_ - 1] + total_wt_r_, 2);
1062
1035
  }
1063
1036
 
1064
- template<typename T, typename S, typename A>
1065
- void var_opt_sketch<T,S,A>::convert_to_heap() {
1037
+ template<typename T, typename A>
1038
+ void var_opt_sketch<T, A>::convert_to_heap() {
1066
1039
  if (h_ < 2) {
1067
1040
  return; // nothing to do
1068
1041
  }
@@ -1081,8 +1054,8 @@ void var_opt_sketch<T,S,A>::convert_to_heap() {
1081
1054
  //}
1082
1055
  }
1083
1056
 
1084
- template<typename T, typename S, typename A>
1085
- void var_opt_sketch<T,S,A>::restore_towards_leaves(uint32_t slot_in) {
1057
+ template<typename T, typename A>
1058
+ void var_opt_sketch<T, A>::restore_towards_leaves(uint32_t slot_in) {
1086
1059
  const uint32_t last_slot = h_ - 1;
1087
1060
  if (h_ == 0 || slot_in > last_slot) throw std::logic_error("invalid heap state");
1088
1061
 
@@ -1109,8 +1082,8 @@ void var_opt_sketch<T,S,A>::restore_towards_leaves(uint32_t slot_in) {
1109
1082
  }
1110
1083
  }
1111
1084
 
1112
- template<typename T, typename S, typename A>
1113
- void var_opt_sketch<T,S,A>::restore_towards_root(uint32_t slot_in) {
1085
+ template<typename T, typename A>
1086
+ void var_opt_sketch<T, A>::restore_towards_root(uint32_t slot_in) {
1114
1087
  uint32_t slot = slot_in;
1115
1088
  uint32_t p = (((slot + 1) / 2) - 1); // valid if slot >= 1
1116
1089
  while ((slot > 0) && (weights_[slot] < weights_[p])) {
@@ -1120,11 +1093,12 @@ void var_opt_sketch<T,S,A>::restore_towards_root(uint32_t slot_in) {
1120
1093
  }
1121
1094
  }
1122
1095
 
1123
- template<typename T, typename S, typename A>
1096
+ template<typename T, typename A>
1124
1097
  template<typename O>
1125
- void var_opt_sketch<T,S,A>::push(O&& item, double wt, bool mark) {
1098
+ void var_opt_sketch<T, A>::push(O&& item, double wt, bool mark) {
1126
1099
  if (filled_data_) {
1127
- data_[h_] = std::forward<O>(item);
1100
+ if (&data_[h_] != &item)
1101
+ data_[h_] = std::forward<O>(item);
1128
1102
  } else {
1129
1103
  new (&data_[h_]) T(std::forward<O>(item));
1130
1104
  filled_data_ = true;
@@ -1139,8 +1113,8 @@ void var_opt_sketch<T,S,A>::push(O&& item, double wt, bool mark) {
1139
1113
  restore_towards_root(h_ - 1); // need use old h_, but want accurate h_
1140
1114
  }
1141
1115
 
1142
- template<typename T, typename S, typename A>
1143
- void var_opt_sketch<T,S,A>::pop_min_to_m_region() {
1116
+ template<typename T, typename A>
1117
+ void var_opt_sketch<T, A>::pop_min_to_m_region() {
1144
1118
  if (h_ == 0 || (h_ + m_ + r_ != k_ + 1))
1145
1119
  throw std::logic_error("invalid heap state popping min to M region");
1146
1120
 
@@ -1164,8 +1138,8 @@ void var_opt_sketch<T,S,A>::pop_min_to_m_region() {
1164
1138
  }
1165
1139
 
1166
1140
 
1167
- template<typename T, typename S, typename A>
1168
- void var_opt_sketch<T,S,A>::swap_values(uint32_t src, uint32_t dst) {
1141
+ template<typename T, typename A>
1142
+ void var_opt_sketch<T, A>::swap_values(uint32_t src, uint32_t dst) {
1169
1143
  std::swap(data_[src], data_[dst]);
1170
1144
  std::swap(weights_[src], weights_[dst]);
1171
1145
 
@@ -1182,8 +1156,8 @@ void var_opt_sketch<T,S,A>::swap_values(uint32_t src, uint32_t dst) {
1182
1156
  of cands is at least 2. We will now grow the candidate set as much as possible
1183
1157
  by pulling sufficiently light items from h to m.
1184
1158
  */
1185
- template<typename T, typename S, typename A>
1186
- void var_opt_sketch<T,S,A>::grow_candidate_set(double wt_cands, uint32_t num_cands) {
1159
+ template<typename T, typename A>
1160
+ void var_opt_sketch<T, A>::grow_candidate_set(double wt_cands, uint32_t num_cands) {
1187
1161
  if ((h_ + m_ + r_ != k_ + 1) || (num_cands < 1) || (num_cands != m_ + r_) || (m_ >= 2))
1188
1162
  throw std::logic_error("invariant violated when growing candidate set");
1189
1163
 
@@ -1206,8 +1180,8 @@ void var_opt_sketch<T,S,A>::grow_candidate_set(double wt_cands, uint32_t num_can
1206
1180
  downsample_candidate_set(wt_cands, num_cands);
1207
1181
  }
1208
1182
 
1209
- template<typename T, typename S, typename A>
1210
- void var_opt_sketch<T,S,A>::downsample_candidate_set(double wt_cands, uint32_t num_cands) {
1183
+ template<typename T, typename A>
1184
+ void var_opt_sketch<T, A>::downsample_candidate_set(double wt_cands, uint32_t num_cands) {
1211
1185
  if (num_cands < 2 || h_ + num_cands != k_ + 1)
1212
1186
  throw std::logic_error("invalid num_cands when downsampling");
1213
1187
 
@@ -1225,17 +1199,16 @@ void var_opt_sketch<T,S,A>::downsample_candidate_set(double wt_cands, uint32_t n
1225
1199
  weights_[j] = -1.0;
1226
1200
  }
1227
1201
 
1228
- // The next two lines work even when delete_slot == leftmost_cand_slot
1202
+ // The next line works even when delete_slot == leftmost_cand_slot
1229
1203
  data_[delete_slot] = std::move(data_[leftmost_cand_slot]);
1230
- // cannot set data_[leftmost_cand_slot] to null since not uisng T*
1231
1204
 
1232
1205
  m_ = 0;
1233
1206
  r_ = num_cands - 1;
1234
1207
  total_wt_r_ = wt_cands;
1235
1208
  }
1236
1209
 
1237
- template<typename T, typename S, typename A>
1238
- uint32_t var_opt_sketch<T,S,A>::choose_delete_slot(double wt_cands, uint32_t num_cands) const {
1210
+ template<typename T, typename A>
1211
+ uint32_t var_opt_sketch<T, A>::choose_delete_slot(double wt_cands, uint32_t num_cands) const {
1239
1212
  if (r_ == 0) throw std::logic_error("choosing delete slot while in exact mode");
1240
1213
 
1241
1214
  if (m_ == 0) {
@@ -1262,8 +1235,8 @@ uint32_t var_opt_sketch<T,S,A>::choose_delete_slot(double wt_cands, uint32_t num
1262
1235
  }
1263
1236
  }
1264
1237
 
1265
- template<typename T, typename S, typename A>
1266
- uint32_t var_opt_sketch<T,S,A>::choose_weighted_delete_slot(double wt_cands, uint32_t num_cands) const {
1238
+ template<typename T, typename A>
1239
+ uint32_t var_opt_sketch<T, A>::choose_weighted_delete_slot(double wt_cands, uint32_t num_cands) const {
1267
1240
  if (m_ < 1) throw std::logic_error("must have weighted delete slot");
1268
1241
 
1269
1242
  const uint32_t offset = h_;
@@ -1286,8 +1259,8 @@ uint32_t var_opt_sketch<T,S,A>::choose_weighted_delete_slot(double wt_cands, uin
1286
1259
  return final_m + 1;
1287
1260
  }
1288
1261
 
1289
- template<typename T, typename S, typename A>
1290
- uint32_t var_opt_sketch<T,S,A>::pick_random_slot_in_r() const {
1262
+ template<typename T, typename A>
1263
+ uint32_t var_opt_sketch<T, A>::pick_random_slot_in_r() const {
1291
1264
  if (r_ == 0) throw std::logic_error("r_ = 0 when picking slot in R region");
1292
1265
  const uint32_t offset = h_ + m_;
1293
1266
  if (r_ == 1) {
@@ -1297,32 +1270,32 @@ uint32_t var_opt_sketch<T,S,A>::pick_random_slot_in_r() const {
1297
1270
  }
1298
1271
  }
1299
1272
 
1300
- template<typename T, typename S, typename A>
1301
- double var_opt_sketch<T,S,A>::peek_min() const {
1273
+ template<typename T, typename A>
1274
+ double var_opt_sketch<T, A>::peek_min() const {
1302
1275
  if (h_ == 0) throw std::logic_error("h_ = 0 when checking min in H region");
1303
1276
  return weights_[0];
1304
1277
  }
1305
1278
 
1306
- template<typename T, typename S, typename A>
1307
- inline bool var_opt_sketch<T,S,A>::is_marked(uint32_t idx) const {
1279
+ template<typename T, typename A>
1280
+ inline bool var_opt_sketch<T, A>::is_marked(uint32_t idx) const {
1308
1281
  return marks_ == nullptr ? false : marks_[idx];
1309
1282
  }
1310
1283
 
1311
- template<typename T, typename S, typename A>
1312
- double var_opt_sketch<T,S,A>::get_tau() const {
1284
+ template<typename T, typename A>
1285
+ double var_opt_sketch<T, A>::get_tau() const {
1313
1286
  return r_ == 0 ? std::nan("1") : (total_wt_r_ / r_);
1314
1287
  }
1315
1288
 
1316
- template<typename T, typename S, typename A>
1317
- void var_opt_sketch<T,S,A>::strip_marks() {
1289
+ template<typename T, typename A>
1290
+ void var_opt_sketch<T, A>::strip_marks() {
1318
1291
  if (marks_ == nullptr) throw std::logic_error("request to strip marks from non-gadget");
1319
1292
  num_marks_in_h_ = 0;
1320
1293
  AllocBool(allocator_).deallocate(marks_, curr_items_alloc_);
1321
1294
  marks_ = nullptr;
1322
1295
  }
1323
1296
 
1324
- template<typename T, typename S, typename A>
1325
- void var_opt_sketch<T,S,A>::check_preamble_longs(uint8_t preamble_longs, uint8_t flags) {
1297
+ template<typename T, typename A>
1298
+ void var_opt_sketch<T, A>::check_preamble_longs(uint8_t preamble_longs, uint8_t flags) {
1326
1299
  const bool is_empty(flags & EMPTY_FLAG_MASK);
1327
1300
 
1328
1301
  if (is_empty) {
@@ -1342,8 +1315,8 @@ void var_opt_sketch<T,S,A>::check_preamble_longs(uint8_t preamble_longs, uint8_t
1342
1315
  }
1343
1316
  }
1344
1317
 
1345
- template<typename T, typename S, typename A>
1346
- void var_opt_sketch<T,S,A>::check_family_and_serialization_version(uint8_t family_id, uint8_t ser_ver) {
1318
+ template<typename T, typename A>
1319
+ void var_opt_sketch<T, A>::check_family_and_serialization_version(uint8_t family_id, uint8_t ser_ver) {
1347
1320
  if (family_id == FAMILY_ID) {
1348
1321
  if (ser_ver != SER_VER) {
1349
1322
  throw std::invalid_argument("Possible corruption: VarOpt serialization version must be "
@@ -1357,8 +1330,8 @@ void var_opt_sketch<T,S,A>::check_family_and_serialization_version(uint8_t famil
1357
1330
  + std::to_string(FAMILY_ID) + ". Found: " + std::to_string(family_id));
1358
1331
  }
1359
1332
 
1360
- template<typename T, typename S, typename A>
1361
- uint32_t var_opt_sketch<T, S, A>::validate_and_get_target_size(uint32_t preamble_longs, uint32_t k, uint64_t n,
1333
+ template<typename T, typename A>
1334
+ uint32_t var_opt_sketch<T, A>::validate_and_get_target_size(uint32_t preamble_longs, uint32_t k, uint64_t n,
1362
1335
  uint32_t h, uint32_t r, resize_factor rf) {
1363
1336
  if (k == 0 || k > MAX_K) {
1364
1337
  throw std::invalid_argument("k must be at least 1 and less than 2^31 - 1");
@@ -1403,9 +1376,9 @@ uint32_t var_opt_sketch<T, S, A>::validate_and_get_target_size(uint32_t preamble
1403
1376
  return array_size;
1404
1377
  }
1405
1378
 
1406
- template<typename T, typename S, typename A>
1379
+ template<typename T, typename A>
1407
1380
  template<typename P>
1408
- subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(P predicate) const {
1381
+ subset_summary var_opt_sketch<T, A>::estimate_subset_sum(P predicate) const {
1409
1382
  if (n_ == 0) {
1410
1383
  return {0.0, 0.0, 0.0, 0.0};
1411
1384
  }
@@ -1451,8 +1424,8 @@ subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(P predicate) const {
1451
1424
  };
1452
1425
  }
1453
1426
 
1454
- template<typename T, typename S, typename A>
1455
- class var_opt_sketch<T, S, A>::items_deleter {
1427
+ template<typename T, typename A>
1428
+ class var_opt_sketch<T, A>::items_deleter {
1456
1429
  public:
1457
1430
  items_deleter(uint32_t num, const A& allocator) : num(num), h_count(0), r_count(0), allocator(allocator) {}
1458
1431
  void set_h(uint32_t h) { h_count = h; }
@@ -1480,8 +1453,8 @@ class var_opt_sketch<T, S, A>::items_deleter {
1480
1453
  A allocator;
1481
1454
  };
1482
1455
 
1483
- template<typename T, typename S, typename A>
1484
- class var_opt_sketch<T, S, A>::weights_deleter {
1456
+ template<typename T, typename A>
1457
+ class var_opt_sketch<T, A>::weights_deleter {
1485
1458
  public:
1486
1459
  weights_deleter(uint32_t num, const A& allocator) : num(num), allocator(allocator) {}
1487
1460
  void operator() (double* ptr) {
@@ -1494,8 +1467,8 @@ class var_opt_sketch<T, S, A>::weights_deleter {
1494
1467
  AllocDouble allocator;
1495
1468
  };
1496
1469
 
1497
- template<typename T, typename S, typename A>
1498
- class var_opt_sketch<T, S, A>::marks_deleter {
1470
+ template<typename T, typename A>
1471
+ class var_opt_sketch<T, A>::marks_deleter {
1499
1472
  public:
1500
1473
  marks_deleter(uint32_t num, const A& allocator) : num(num), allocator(allocator) {}
1501
1474
  void operator() (bool* ptr) {
@@ -1509,20 +1482,20 @@ class var_opt_sketch<T, S, A>::marks_deleter {
1509
1482
  };
1510
1483
 
1511
1484
 
1512
- template<typename T, typename S, typename A>
1513
- typename var_opt_sketch<T, S, A>::const_iterator var_opt_sketch<T, S, A>::begin() const {
1514
- return var_opt_sketch<T, S, A>::const_iterator(*this, false);
1485
+ template<typename T, typename A>
1486
+ typename var_opt_sketch<T, A>::const_iterator var_opt_sketch<T, A>::begin() const {
1487
+ return const_iterator(*this, false);
1515
1488
  }
1516
1489
 
1517
- template<typename T, typename S, typename A>
1518
- typename var_opt_sketch<T, S, A>::const_iterator var_opt_sketch<T, S, A>::end() const {
1519
- return var_opt_sketch<T, S, A>::const_iterator(*this, true);
1490
+ template<typename T, typename A>
1491
+ typename var_opt_sketch<T, A>::const_iterator var_opt_sketch<T, A>::end() const {
1492
+ return const_iterator(*this, true);
1520
1493
  }
1521
1494
 
1522
1495
  // -------- var_opt_sketch::const_iterator implementation ---------
1523
1496
 
1524
- template<typename T, typename S, typename A>
1525
- var_opt_sketch<T,S,A>::const_iterator::const_iterator(const var_opt_sketch<T,S,A>& sk, bool is_end) :
1497
+ template<typename T, typename A>
1498
+ var_opt_sketch<T, A>::const_iterator::const_iterator(const var_opt_sketch& sk, bool is_end) :
1526
1499
  sk_(&sk),
1527
1500
  cum_r_weight_(0.0),
1528
1501
  r_item_wt_(sk.get_tau()),
@@ -1540,8 +1513,8 @@ var_opt_sketch<T,S,A>::const_iterator::const_iterator(const var_opt_sketch<T,S,A
1540
1513
  if (idx_ == final_idx_) { sk_ = nullptr; }
1541
1514
  }
1542
1515
 
1543
- template<typename T, typename S, typename A>
1544
- var_opt_sketch<T,S,A>::const_iterator::const_iterator(const var_opt_sketch<T,S,A>& sk, bool is_end, bool use_r_region) :
1516
+ template<typename T, typename A>
1517
+ var_opt_sketch<T, A>::const_iterator::const_iterator(const var_opt_sketch& sk, bool is_end, bool use_r_region) :
1545
1518
  sk_(&sk),
1546
1519
  cum_r_weight_(0.0),
1547
1520
  r_item_wt_(sk.get_tau()),
@@ -1559,8 +1532,8 @@ var_opt_sketch<T,S,A>::const_iterator::const_iterator(const var_opt_sketch<T,S,A
1559
1532
  }
1560
1533
 
1561
1534
 
1562
- template<typename T, typename S, typename A>
1563
- var_opt_sketch<T, S, A>::const_iterator::const_iterator(const const_iterator& other) :
1535
+ template<typename T, typename A>
1536
+ var_opt_sketch<T, A>::const_iterator::const_iterator(const const_iterator& other) :
1564
1537
  sk_(other.sk_),
1565
1538
  cum_r_weight_(other.cum_r_weight_),
1566
1539
  r_item_wt_(other.r_item_wt_),
@@ -1568,8 +1541,8 @@ var_opt_sketch<T, S, A>::const_iterator::const_iterator(const const_iterator& ot
1568
1541
  final_idx_(other.final_idx_)
1569
1542
  {}
1570
1543
 
1571
- template<typename T, typename S, typename A>
1572
- typename var_opt_sketch<T, S, A>::const_iterator& var_opt_sketch<T, S, A>::const_iterator::operator++() {
1544
+ template<typename T, typename A>
1545
+ typename var_opt_sketch<T, A>::const_iterator& var_opt_sketch<T, A>::const_iterator::operator++() {
1573
1546
  ++idx_;
1574
1547
 
1575
1548
  if (idx_ == final_idx_) {
@@ -1582,27 +1555,27 @@ typename var_opt_sketch<T, S, A>::const_iterator& var_opt_sketch<T, S, A>::const
1582
1555
  return *this;
1583
1556
  }
1584
1557
 
1585
- template<typename T, typename S, typename A>
1586
- typename var_opt_sketch<T, S, A>::const_iterator& var_opt_sketch<T, S, A>::const_iterator::operator++(int) {
1558
+ template<typename T, typename A>
1559
+ typename var_opt_sketch<T, A>::const_iterator& var_opt_sketch<T, A>::const_iterator::operator++(int) {
1587
1560
  const_iterator tmp(*this);
1588
1561
  operator++();
1589
1562
  return tmp;
1590
1563
  }
1591
1564
 
1592
- template<typename T, typename S, typename A>
1593
- bool var_opt_sketch<T, S, A>::const_iterator::operator==(const const_iterator& other) const {
1565
+ template<typename T, typename A>
1566
+ bool var_opt_sketch<T, A>::const_iterator::operator==(const const_iterator& other) const {
1594
1567
  if (sk_ != other.sk_) return false;
1595
1568
  if (sk_ == nullptr) return true; // end (and we know other.sk_ is also null)
1596
1569
  return idx_ == other.idx_;
1597
1570
  }
1598
1571
 
1599
- template<typename T, typename S, typename A>
1600
- bool var_opt_sketch<T, S, A>::const_iterator::operator!=(const const_iterator& other) const {
1572
+ template<typename T, typename A>
1573
+ bool var_opt_sketch<T, A>::const_iterator::operator!=(const const_iterator& other) const {
1601
1574
  return !operator==(other);
1602
1575
  }
1603
1576
 
1604
- template<typename T, typename S, typename A>
1605
- const std::pair<const T&, const double> var_opt_sketch<T, S, A>::const_iterator::operator*() const {
1577
+ template<typename T, typename A>
1578
+ const std::pair<const T&, const double> var_opt_sketch<T, A>::const_iterator::operator*() const {
1606
1579
  double wt;
1607
1580
  if (idx_ < sk_->h_) {
1608
1581
  wt = sk_->weights_[idx_];
@@ -1612,16 +1585,16 @@ const std::pair<const T&, const double> var_opt_sketch<T, S, A>::const_iterator:
1612
1585
  return std::pair<const T&, const double>(sk_->data_[idx_], wt);
1613
1586
  }
1614
1587
 
1615
- template<typename T, typename S, typename A>
1616
- bool var_opt_sketch<T, S, A>::const_iterator::get_mark() const {
1588
+ template<typename T, typename A>
1589
+ bool var_opt_sketch<T, A>::const_iterator::get_mark() const {
1617
1590
  return sk_->marks_ == nullptr ? false : sk_->marks_[idx_];
1618
1591
  }
1619
1592
 
1620
1593
 
1621
1594
  // -------- var_opt_sketch::iterator implementation ---------
1622
1595
 
1623
- template<typename T, typename S, typename A>
1624
- var_opt_sketch<T,S,A>::iterator::iterator(const var_opt_sketch<T,S,A>& sk, bool is_end, bool use_r_region) :
1596
+ template<typename T, typename A>
1597
+ var_opt_sketch<T, A>::iterator::iterator(const var_opt_sketch& sk, bool is_end, bool use_r_region) :
1625
1598
  sk_(&sk),
1626
1599
  cum_r_weight_(0.0),
1627
1600
  r_item_wt_(sk.get_tau()),
@@ -1638,8 +1611,8 @@ var_opt_sketch<T,S,A>::iterator::iterator(const var_opt_sketch<T,S,A>& sk, bool
1638
1611
  if (idx_ == final_idx_) { sk_ = nullptr; }
1639
1612
  }
1640
1613
 
1641
- template<typename T, typename S, typename A>
1642
- var_opt_sketch<T, S, A>::iterator::iterator(const iterator& other) :
1614
+ template<typename T, typename A>
1615
+ var_opt_sketch<T, A>::iterator::iterator(const iterator& other) :
1643
1616
  sk_(other.sk_),
1644
1617
  cum_r_weight_(other.cum_r_weight_),
1645
1618
  r_item_wt_(other.r_item_wt_),
@@ -1647,8 +1620,8 @@ var_opt_sketch<T, S, A>::iterator::iterator(const iterator& other) :
1647
1620
  final_idx_(other.final_idx_)
1648
1621
  {}
1649
1622
 
1650
- template<typename T, typename S, typename A>
1651
- typename var_opt_sketch<T, S, A>::iterator& var_opt_sketch<T, S, A>::iterator::operator++() {
1623
+ template<typename T, typename A>
1624
+ typename var_opt_sketch<T, A>::iterator& var_opt_sketch<T, A>::iterator::operator++() {
1652
1625
  ++idx_;
1653
1626
 
1654
1627
  if (idx_ == final_idx_) {
@@ -1661,27 +1634,27 @@ typename var_opt_sketch<T, S, A>::iterator& var_opt_sketch<T, S, A>::iterator::o
1661
1634
  return *this;
1662
1635
  }
1663
1636
 
1664
- template<typename T, typename S, typename A>
1665
- typename var_opt_sketch<T, S, A>::iterator& var_opt_sketch<T, S, A>::iterator::operator++(int) {
1637
+ template<typename T, typename A>
1638
+ typename var_opt_sketch<T, A>::iterator& var_opt_sketch<T, A>::iterator::operator++(int) {
1666
1639
  const_iterator tmp(*this);
1667
1640
  operator++();
1668
1641
  return tmp;
1669
1642
  }
1670
1643
 
1671
- template<typename T, typename S, typename A>
1672
- bool var_opt_sketch<T, S, A>::iterator::operator==(const iterator& other) const {
1644
+ template<typename T, typename A>
1645
+ bool var_opt_sketch<T, A>::iterator::operator==(const iterator& other) const {
1673
1646
  if (sk_ != other.sk_) return false;
1674
1647
  if (sk_ == nullptr) return true; // end (and we know other.sk_ is also null)
1675
1648
  return idx_ == other.idx_;
1676
1649
  }
1677
1650
 
1678
- template<typename T, typename S, typename A>
1679
- bool var_opt_sketch<T, S, A>::iterator::operator!=(const iterator& other) const {
1651
+ template<typename T, typename A>
1652
+ bool var_opt_sketch<T, A>::iterator::operator!=(const iterator& other) const {
1680
1653
  return !operator==(other);
1681
1654
  }
1682
1655
 
1683
- template<typename T, typename S, typename A>
1684
- std::pair<T&, double> var_opt_sketch<T, S, A>::iterator::operator*() {
1656
+ template<typename T, typename A>
1657
+ std::pair<T&, double> var_opt_sketch<T, A>::iterator::operator*() {
1685
1658
  double wt;
1686
1659
  if (idx_ < sk_->h_) {
1687
1660
  wt = sk_->weights_[idx_];
@@ -1693,8 +1666,8 @@ std::pair<T&, double> var_opt_sketch<T, S, A>::iterator::operator*() {
1693
1666
  return std::pair<T&, double>(sk_->data_[idx_], wt);
1694
1667
  }
1695
1668
 
1696
- template<typename T, typename S, typename A>
1697
- bool var_opt_sketch<T, S, A>::iterator::get_mark() const {
1669
+ template<typename T, typename A>
1670
+ bool var_opt_sketch<T, A>::iterator::get_mark() const {
1698
1671
  return sk_->marks_ == nullptr ? false : sk_->marks_[idx_];
1699
1672
  }
1700
1673
 
@@ -1702,40 +1675,40 @@ bool var_opt_sketch<T, S, A>::iterator::get_mark() const {
1702
1675
  * Checks if target sampling allocation is more than 50% of max sampling size.
1703
1676
  * If so, returns max sampling size, otherwise passes through target size.
1704
1677
  */
1705
- template<typename T, typename S, typename A>
1706
- uint32_t var_opt_sketch<T,S,A>::get_adjusted_size(uint32_t max_size, uint32_t resize_target) {
1707
- if (max_size - (resize_target << 1) < 0L) {
1678
+ template<typename T, typename A>
1679
+ uint32_t var_opt_sketch<T, A>::get_adjusted_size(uint32_t max_size, uint32_t resize_target) {
1680
+ if (max_size < (resize_target << 1)) {
1708
1681
  return max_size;
1709
1682
  }
1710
1683
  return resize_target;
1711
1684
  }
1712
1685
 
1713
- template<typename T, typename S, typename A>
1714
- uint32_t var_opt_sketch<T,S,A>::starting_sub_multiple(uint32_t lg_target, uint32_t lg_rf, uint32_t lg_min) {
1686
+ template<typename T, typename A>
1687
+ uint32_t var_opt_sketch<T, A>::starting_sub_multiple(uint32_t lg_target, uint32_t lg_rf, uint32_t lg_min) {
1715
1688
  return (lg_target <= lg_min)
1716
1689
  ? lg_min : (lg_rf == 0) ? lg_target
1717
1690
  : (lg_target - lg_min) % lg_rf + lg_min;
1718
1691
  }
1719
1692
 
1720
- template<typename T, typename S, typename A>
1721
- double var_opt_sketch<T,S,A>::pseudo_hypergeometric_ub_on_p(uint64_t n, uint32_t k, double sampling_rate) {
1693
+ template<typename T, typename A>
1694
+ double var_opt_sketch<T, A>::pseudo_hypergeometric_ub_on_p(uint64_t n, uint32_t k, double sampling_rate) {
1722
1695
  const double adjusted_kappa = DEFAULT_KAPPA * sqrt(1 - sampling_rate);
1723
1696
  return bounds_binomial_proportions::approximate_upper_bound_on_p(n, k, adjusted_kappa);
1724
1697
  }
1725
1698
 
1726
- template<typename T, typename S, typename A>
1727
- double var_opt_sketch<T,S,A>::pseudo_hypergeometric_lb_on_p(uint64_t n, uint32_t k, double sampling_rate) {
1699
+ template<typename T, typename A>
1700
+ double var_opt_sketch<T, A>::pseudo_hypergeometric_lb_on_p(uint64_t n, uint32_t k, double sampling_rate) {
1728
1701
  const double adjusted_kappa = DEFAULT_KAPPA * sqrt(1 - sampling_rate);
1729
1702
  return bounds_binomial_proportions::approximate_lower_bound_on_p(n, k, adjusted_kappa);
1730
1703
  }
1731
1704
 
1732
- template<typename T, typename S, typename A>
1733
- bool var_opt_sketch<T,S,A>::is_power_of_2(uint32_t v) {
1705
+ template<typename T, typename A>
1706
+ bool var_opt_sketch<T, A>::is_power_of_2(uint32_t v) {
1734
1707
  return v && !(v & (v - 1));
1735
1708
  }
1736
1709
 
1737
- template<typename T, typename S, typename A>
1738
- uint32_t var_opt_sketch<T,S,A>::to_log_2(uint32_t v) {
1710
+ template<typename T, typename A>
1711
+ uint32_t var_opt_sketch<T, A>::to_log_2(uint32_t v) {
1739
1712
  if (is_power_of_2(v)) {
1740
1713
  return count_trailing_zeros_in_u32(v);
1741
1714
  } else {
@@ -1744,14 +1717,14 @@ uint32_t var_opt_sketch<T,S,A>::to_log_2(uint32_t v) {
1744
1717
  }
1745
1718
 
1746
1719
  // Returns an integer in the range [0, max_value) -- excludes max_value
1747
- template<typename T, typename S, typename A>
1748
- uint32_t var_opt_sketch<T,S,A>::next_int(uint32_t max_value) {
1720
+ template<typename T, typename A>
1721
+ uint32_t var_opt_sketch<T, A>::next_int(uint32_t max_value) {
1749
1722
  std::uniform_int_distribution<uint32_t> dist(0, max_value - 1);
1750
1723
  return dist(random_utils::rand);
1751
1724
  }
1752
1725
 
1753
- template<typename T, typename S, typename A>
1754
- double var_opt_sketch<T,S,A>::next_double_exclude_zero() {
1726
+ template<typename T, typename A>
1727
+ double var_opt_sketch<T, A>::next_double_exclude_zero() {
1755
1728
  double r = random_utils::next_double(random_utils::rand);
1756
1729
  while (r == 0.0) {
1757
1730
  r = random_utils::next_double(random_utils::rand);