datasketches 0.2.2 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/README.md +8 -8
  6. data/ext/datasketches/kll_wrapper.cpp +5 -1
  7. data/ext/datasketches/theta_wrapper.cpp +20 -4
  8. data/lib/datasketches/version.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +27 -5
  10. data/vendor/datasketches-cpp/LICENSE +40 -3
  11. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  12. data/vendor/datasketches-cpp/NOTICE +1 -1
  13. data/vendor/datasketches-cpp/README.md +76 -9
  14. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +18 -13
  16. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +16 -0
  18. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
  19. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
  20. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
  21. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
  22. data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
  23. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  24. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
  25. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
  26. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
  27. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +1 -1
  28. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
  29. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +10 -6
  31. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
  33. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
  34. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  35. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
  36. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +30 -12
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
  38. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
  41. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
  42. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
  44. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
  45. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
  46. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
  47. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
  48. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
  49. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
  50. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
  51. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
  52. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
  53. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
  54. data/vendor/datasketches-cpp/kll/CMakeLists.txt +5 -19
  55. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
  56. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
  57. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +103 -44
  58. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +110 -130
  59. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +156 -23
  60. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
  61. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  62. data/vendor/datasketches-cpp/python/CMakeLists.txt +17 -6
  63. data/vendor/datasketches-cpp/python/README.md +57 -50
  64. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  65. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  66. data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
  67. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
  68. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +49 -14
  69. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
  70. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
  71. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
  72. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +2 -2
  73. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +12 -5
  74. data/vendor/datasketches-cpp/python/tests/kll_test.py +12 -6
  75. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
  76. data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
  77. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  78. data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
  79. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
  80. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
  81. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
  82. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
  83. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
  84. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
  85. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
  86. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
  87. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
  88. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
  89. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
  90. data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
  91. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
  92. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
  93. data/vendor/datasketches-cpp/req/CMakeLists.txt +6 -21
  94. data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
  95. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
  96. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
  97. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +66 -61
  98. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
  99. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  100. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +54 -12
  101. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +45 -34
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +33 -15
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
  107. data/vendor/datasketches-cpp/setup.py +10 -7
  108. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  110. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +92 -23
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +7 -6
  114. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +3 -2
  115. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
  116. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +150 -93
  117. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
  118. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -2
  120. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  121. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +9 -5
  122. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +39 -10
  123. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  124. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
  125. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  126. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  127. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  130. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
  131. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +446 -0
  132. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +429 -1
  133. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -11
  134. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  135. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  136. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
  137. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  138. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +29 -9
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +34 -14
  141. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  142. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  143. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
  144. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
  145. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
  146. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
  147. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +8 -0
  148. metadata +34 -12
  149. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
  150. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
  151. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
  152. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
  153. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  154. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -22,6 +22,7 @@
22
22
 
23
23
  #include <sstream>
24
24
  #include <vector>
25
+ #include <stdexcept>
25
26
 
26
27
  #include "serde.hpp"
27
28
  #include "binomial_bounds.hpp"
@@ -31,64 +32,72 @@
31
32
  namespace datasketches {
32
33
 
33
34
  template<typename A>
34
- bool theta_sketch_alloc<A>::is_estimation_mode() const {
35
+ bool base_theta_sketch_alloc<A>::is_estimation_mode() const {
35
36
  return get_theta64() < theta_constants::MAX_THETA && !is_empty();
36
37
  }
37
38
 
38
39
  template<typename A>
39
- double theta_sketch_alloc<A>::get_theta() const {
40
+ double base_theta_sketch_alloc<A>::get_theta() const {
40
41
  return static_cast<double>(get_theta64()) / theta_constants::MAX_THETA;
41
42
  }
42
43
 
43
44
  template<typename A>
44
- double theta_sketch_alloc<A>::get_estimate() const {
45
+ double base_theta_sketch_alloc<A>::get_estimate() const {
45
46
  return get_num_retained() / get_theta();
46
47
  }
47
48
 
48
49
  template<typename A>
49
- double theta_sketch_alloc<A>::get_lower_bound(uint8_t num_std_devs) const {
50
+ double base_theta_sketch_alloc<A>::get_lower_bound(uint8_t num_std_devs) const {
50
51
  if (!is_estimation_mode()) return get_num_retained();
51
52
  return binomial_bounds::get_lower_bound(get_num_retained(), get_theta(), num_std_devs);
52
53
  }
53
54
 
54
55
  template<typename A>
55
- double theta_sketch_alloc<A>::get_upper_bound(uint8_t num_std_devs) const {
56
+ double base_theta_sketch_alloc<A>::get_upper_bound(uint8_t num_std_devs) const {
56
57
  if (!is_estimation_mode()) return get_num_retained();
57
58
  return binomial_bounds::get_upper_bound(get_num_retained(), get_theta(), num_std_devs);
58
59
  }
59
60
 
60
61
  template<typename A>
61
- string<A> theta_sketch_alloc<A>::to_string(bool detail) const {
62
- ostrstream os;
62
+ string<A> base_theta_sketch_alloc<A>::to_string(bool print_details) const {
63
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
64
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
65
+ std::ostringstream os;
63
66
  os << "### Theta sketch summary:" << std::endl;
64
- os << " num retained entries : " << get_num_retained() << std::endl;
65
- os << " seed hash : " << get_seed_hash() << std::endl;
66
- os << " empty? : " << (is_empty() ? "true" : "false") << std::endl;
67
- os << " ordered? : " << (is_ordered() ? "true" : "false") << std::endl;
68
- os << " estimation mode? : " << (is_estimation_mode() ? "true" : "false") << std::endl;
69
- os << " theta (fraction) : " << get_theta() << std::endl;
70
- os << " theta (raw 64-bit) : " << get_theta64() << std::endl;
67
+ os << " num retained entries : " << this->get_num_retained() << std::endl;
68
+ os << " seed hash : " << this->get_seed_hash() << std::endl;
69
+ os << " empty? : " << (this->is_empty() ? "true" : "false") << std::endl;
70
+ os << " ordered? : " << (this->is_ordered() ? "true" : "false") << std::endl;
71
+ os << " estimation mode? : " << (this->is_estimation_mode() ? "true" : "false") << std::endl;
72
+ os << " theta (fraction) : " << this->get_theta() << std::endl;
73
+ os << " theta (raw 64-bit) : " << this->get_theta64() << std::endl;
71
74
  os << " estimate : " << this->get_estimate() << std::endl;
72
75
  os << " lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
73
76
  os << " upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
74
77
  print_specifics(os);
75
78
  os << "### End sketch summary" << std::endl;
76
- if (detail) {
79
+ if (print_details) {
80
+ print_items(os);
81
+ }
82
+ return string<A>(os.str().c_str(), this->get_allocator());
83
+ }
84
+
85
+ template<typename A>
86
+ void theta_sketch_alloc<A>::print_items(std::ostringstream& os) const {
77
87
  os << "### Retained entries" << std::endl;
78
88
  for (const auto& hash: *this) {
79
89
  os << hash << std::endl;
80
90
  }
81
91
  os << "### End retained entries" << std::endl;
82
- }
83
- return os.str();
84
92
  }
85
93
 
94
+
86
95
  // update sketch
87
96
 
88
97
  template<typename A>
89
98
  update_theta_sketch_alloc<A>::update_theta_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
90
- uint64_t theta, uint64_t seed, const A& allocator):
91
- table_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator)
99
+ float p, uint64_t theta, uint64_t seed, const A& allocator):
100
+ table_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator)
92
101
  {}
93
102
 
94
103
  template<typename A>
@@ -103,12 +112,12 @@ bool update_theta_sketch_alloc<A>::is_empty() const {
103
112
 
104
113
  template<typename A>
105
114
  bool update_theta_sketch_alloc<A>::is_ordered() const {
106
- return false;
115
+ return table_.num_entries_ > 1 ? false : true;
107
116
  }
108
117
 
109
118
  template<typename A>
110
119
  uint64_t update_theta_sketch_alloc<A>::get_theta64() const {
111
- return table_.theta_;
120
+ return is_empty() ? theta_constants::MAX_THETA : table_.theta_;
112
121
  }
113
122
 
114
123
  template<typename A>
@@ -202,6 +211,11 @@ void update_theta_sketch_alloc<A>::trim() {
202
211
  table_.trim();
203
212
  }
204
213
 
214
+ template<typename A>
215
+ void update_theta_sketch_alloc<A>::reset() {
216
+ table_.reset();
217
+ }
218
+
205
219
  template<typename A>
206
220
  auto update_theta_sketch_alloc<A>::begin() -> iterator {
207
221
  return iterator(table_.entries_, 1 << table_.lg_cur_size_, 0);
@@ -228,7 +242,7 @@ compact_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::compact(bool ordered
228
242
  }
229
243
 
230
244
  template<typename A>
231
- void update_theta_sketch_alloc<A>::print_specifics(ostrstream& os) const {
245
+ void update_theta_sketch_alloc<A>::print_specifics(std::ostringstream& os) const {
232
246
  os << " lg nominal size : " << static_cast<int>(table_.lg_nom_size_) << std::endl;
233
247
  os << " lg current size : " << static_cast<int>(table_.lg_cur_size_) << std::endl;
234
248
  os << " resize factor : " << (1 << table_.rf_) << std::endl;
@@ -241,7 +255,7 @@ update_theta_sketch_alloc<A>::builder::builder(const A& allocator): theta_base_b
241
255
 
242
256
  template<typename A>
243
257
  update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::builder::build() const {
244
- return update_theta_sketch_alloc(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->allocator_);
258
+ return update_theta_sketch_alloc(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->allocator_);
245
259
  }
246
260
 
247
261
  // compact sketch
@@ -255,16 +269,18 @@ seed_hash_(other.get_seed_hash()),
255
269
  theta_(other.get_theta64()),
256
270
  entries_(other.get_allocator())
257
271
  {
258
- entries_.reserve(other.get_num_retained());
259
- std::copy(other.begin(), other.end(), std::back_inserter(entries_));
260
- if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end());
272
+ if (!other.is_empty()) {
273
+ entries_.reserve(other.get_num_retained());
274
+ std::copy(other.begin(), other.end(), std::back_inserter(entries_));
275
+ if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end());
276
+ }
261
277
  }
262
278
 
263
279
  template<typename A>
264
280
  compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
265
281
  std::vector<uint64_t, A>&& entries):
266
282
  is_empty_(is_empty),
267
- is_ordered_(is_ordered),
283
+ is_ordered_(is_ordered || (entries.size() <= 1ULL)),
268
284
  seed_hash_(seed_hash),
269
285
  theta_(theta),
270
286
  entries_(std::move(entries))
@@ -321,7 +337,7 @@ auto compact_theta_sketch_alloc<A>::end() const -> const_iterator {
321
337
  }
322
338
 
323
339
  template<typename A>
324
- void compact_theta_sketch_alloc<A>::print_specifics(ostrstream&) const {}
340
+ void compact_theta_sketch_alloc<A>::print_specifics(std::ostringstream&) const {}
325
341
 
326
342
  template<typename A>
327
343
  void compact_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
@@ -400,78 +416,107 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(std::is
400
416
  const auto preamble_longs = read<uint8_t>(is);
401
417
  const auto serial_version = read<uint8_t>(is);
402
418
  const auto type = read<uint8_t>(is);
403
- read<uint16_t>(is); // unused
404
- const auto flags_byte = read<uint8_t>(is);
405
- const auto seed_hash = read<uint16_t>(is);
406
- checker<true>::check_sketch_type(type, SKETCH_TYPE);
407
- checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
408
- const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
409
- if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
410
-
411
- uint64_t theta = theta_constants::MAX_THETA;
412
- uint32_t num_entries = 0;
413
- if (!is_empty) {
414
- if (preamble_longs == 1) {
415
- num_entries = 1;
416
- } else {
417
- num_entries = read<uint32_t>(is);
419
+ switch (serial_version) {
420
+ case SERIAL_VERSION: {
421
+ read<uint16_t>(is); // unused
422
+ const auto flags_byte = read<uint8_t>(is);
423
+ const auto seed_hash = read<uint16_t>(is);
424
+ checker<true>::check_sketch_type(type, SKETCH_TYPE);
425
+ checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
426
+ const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
427
+ if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
428
+
429
+ uint64_t theta = theta_constants::MAX_THETA;
430
+ uint32_t num_entries = 0;
431
+ if (!is_empty) {
432
+ if (preamble_longs == 1) {
433
+ num_entries = 1;
434
+ } else {
435
+ num_entries = read<uint32_t>(is);
436
+ read<uint32_t>(is); // unused
437
+ if (preamble_longs > 2) {
438
+ theta = read<uint64_t>(is);
439
+ }
440
+ }
441
+ }
442
+ std::vector<uint64_t, A> entries(num_entries, 0, allocator);
443
+ if (!is_empty) read(is, entries.data(), sizeof(uint64_t) * entries.size());
444
+
445
+ const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
446
+ if (!is.good()) throw std::runtime_error("error reading from std::istream");
447
+ return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
448
+ }
449
+ case 1: {
450
+ const auto seed_hash = compute_seed_hash(seed);
451
+ checker<true>::check_sketch_type(type, SKETCH_TYPE);
452
+ read<uint8_t>(is); // unused
418
453
  read<uint32_t>(is); // unused
419
- if (preamble_longs > 2) {
420
- theta = read<uint64_t>(is);
454
+ const auto num_entries = read<uint32_t>(is);
455
+ read<uint32_t>(is); //unused
456
+ const auto theta = read<uint64_t>(is);
457
+ std::vector<uint64_t, A> entries(num_entries, 0, allocator);
458
+ bool is_empty = (num_entries == 0) && (theta == theta_constants::MAX_THETA);
459
+ if (!is_empty)
460
+ read(is, entries.data(), sizeof(uint64_t) * entries.size());
461
+ if (!is.good())
462
+ throw std::runtime_error("error reading from std::istream");
463
+ return compact_theta_sketch_alloc(is_empty, true, seed_hash, theta, std::move(entries));
464
+ }
465
+ case 2: {
466
+ checker<true>::check_sketch_type(type, SKETCH_TYPE);
467
+ read<uint8_t>(is); // unused
468
+ read<uint16_t>(is); // unused
469
+ const uint16_t seed_hash = read<uint16_t>(is);
470
+ checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
471
+ if (preamble_longs == 1) {
472
+ if (!is.good())
473
+ throw std::runtime_error("error reading from std::istream");
474
+ std::vector<uint64_t, A> entries(0, 0, allocator);
475
+ return compact_theta_sketch_alloc(true, true, seed_hash, theta_constants::MAX_THETA, std::move(entries));
476
+ } else if (preamble_longs == 2) {
477
+ const uint32_t num_entries = read<uint32_t>(is);
478
+ read<uint32_t>(is); // unused
479
+ std::vector<uint64_t, A> entries(num_entries, 0, allocator);
480
+ if (num_entries == 0) {
481
+ return compact_theta_sketch_alloc(true, true, seed_hash, theta_constants::MAX_THETA, std::move(entries));
482
+ }
483
+ read(is, entries.data(), entries.size() * sizeof(uint64_t));
484
+ if (!is.good())
485
+ throw std::runtime_error("error reading from std::istream");
486
+ return compact_theta_sketch_alloc(false, true, seed_hash, theta_constants::MAX_THETA, std::move(entries));
487
+ } else if (preamble_longs == 3) {
488
+ const uint32_t num_entries = read<uint32_t>(is);
489
+ read<uint32_t>(is); // unused
490
+ const auto theta = read<uint64_t>(is);
491
+ bool is_empty = (num_entries == 0) && (theta == theta_constants::MAX_THETA);
492
+ std::vector<uint64_t, A> entries(num_entries, 0, allocator);
493
+ if (is_empty) {
494
+ if (!is.good())
495
+ throw std::runtime_error("error reading from std::istream");
496
+ return compact_theta_sketch_alloc(true, true, seed_hash, theta, std::move(entries));
497
+ } else {
498
+ read(is, entries.data(), sizeof(uint64_t) * entries.size());
499
+ if (!is.good())
500
+ throw std::runtime_error("error reading from std::istream");
501
+ return compact_theta_sketch_alloc(false, true, seed_hash, theta, std::move(entries));
502
+ }
503
+ } else {
504
+ throw std::invalid_argument(std::to_string(preamble_longs) + " longs of premable, but expected 1, 2, or 3");
421
505
  }
422
- }
423
506
  }
424
- std::vector<uint64_t, A> entries(num_entries, 0, allocator);
425
- if (!is_empty) read(is, entries.data(), sizeof(uint64_t) * entries.size());
426
-
427
- const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
428
- if (!is.good()) throw std::runtime_error("error reading from std::istream");
429
- return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
507
+ default:
508
+ // this should always fail since the valid cases are handled above
509
+ checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
510
+ // this throw is never reached, because check_serial_version will throw an informative exception.
511
+ // This is only here to avoid a compiler warning about a path without a return value.
512
+ throw std::invalid_argument("unexpected sketch serialization version");
513
+ }
430
514
  }
431
515
 
432
516
  template<typename A>
433
517
  compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) {
434
- ensure_minimum_memory(size, 8);
435
- const char* ptr = static_cast<const char*>(bytes);
436
- const char* base = ptr;
437
- uint8_t preamble_longs;
438
- ptr += copy_from_mem(ptr, preamble_longs);
439
- uint8_t serial_version;
440
- ptr += copy_from_mem(ptr, serial_version);
441
- uint8_t type;
442
- ptr += copy_from_mem(ptr, type);
443
- ptr += sizeof(uint16_t); // unused
444
- uint8_t flags_byte;
445
- ptr += copy_from_mem(ptr, flags_byte);
446
- uint16_t seed_hash;
447
- ptr += copy_from_mem(ptr, seed_hash);
448
- checker<true>::check_sketch_type(type, SKETCH_TYPE);
449
- checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
450
- const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
451
- if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
452
-
453
- uint64_t theta = theta_constants::MAX_THETA;
454
- uint32_t num_entries = 0;
455
- if (!is_empty) {
456
- if (preamble_longs == 1) {
457
- num_entries = 1;
458
- } else {
459
- ensure_minimum_memory(size, 8); // read the first prelong before this method
460
- ptr += copy_from_mem(ptr, num_entries);
461
- ptr += sizeof(uint32_t); // unused
462
- if (preamble_longs > 2) {
463
- ensure_minimum_memory(size, (preamble_longs - 1) << 3);
464
- ptr += copy_from_mem(ptr, theta);
465
- }
466
- }
467
- }
468
- const size_t entries_size_bytes = sizeof(uint64_t) * num_entries;
469
- check_memory_size(ptr - base + entries_size_bytes, size);
470
- std::vector<uint64_t, A> entries(num_entries, 0, allocator);
471
- if (!is_empty) ptr += copy_from_mem(ptr, entries.data(), entries_size_bytes);
472
-
473
- const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
474
- return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
518
+ auto data = compact_theta_sketch_parser<true>::parse(bytes, size, seed, false);
519
+ return compact_theta_sketch_alloc(data.is_empty, data.is_ordered, data.seed_hash, data.theta, std::vector<uint64_t, A>(data.entries, data.entries + data.num_entries, allocator));
475
520
  }
476
521
 
477
522
  // wrapped compact sketch
@@ -533,6 +578,18 @@ auto wrapped_compact_theta_sketch_alloc<A>::end() const -> const_iterator {
533
578
  return entries_ + num_entries_;
534
579
  }
535
580
 
581
+ template<typename A>
582
+ void wrapped_compact_theta_sketch_alloc<A>::print_specifics(std::ostringstream&) const {}
583
+
584
+ template<typename A>
585
+ void wrapped_compact_theta_sketch_alloc<A>::print_items(std::ostringstream& os) const {
586
+ os << "### Retained entries" << std::endl;
587
+ for (const auto& hash: *this) {
588
+ os << hash << std::endl;
589
+ }
590
+ os << "### End retained entries" << std::endl;
591
+ }
592
+
536
593
  } /* namespace datasketches */
537
594
 
538
595
  #endif
@@ -60,11 +60,16 @@ public:
60
60
  */
61
61
  CompactSketch get_result(bool ordered = true) const;
62
62
 
63
+ /**
64
+ * Reset the union to the initial empty state
65
+ */
66
+ void reset();
67
+
63
68
  private:
64
69
  State state_;
65
70
 
66
71
  // for builder
67
- theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Allocator& allocator);
72
+ theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Allocator& allocator);
68
73
  };
69
74
 
70
75
  template<typename A>
@@ -38,7 +38,7 @@ public:
38
38
  using resize_factor = typename hash_table::resize_factor;
39
39
  using comparator = compare_by_key<ExtractKey>;
40
40
 
41
- theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
41
+ theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
42
42
 
43
43
  template<typename FwdSketch>
44
44
  void update(FwdSketch&& sketch);
@@ -47,6 +47,8 @@ public:
47
47
 
48
48
  const Policy& get_policy() const;
49
49
 
50
+ void reset();
51
+
50
52
  private:
51
53
  Policy policy_;
52
54
  hash_table table_;
@@ -21,6 +21,7 @@
21
21
  #define THETA_UNION_BASE_IMPL_HPP_
22
22
 
23
23
  #include <algorithm>
24
+ #include <stdexcept>
24
25
 
25
26
  #include "conditional_forward.hpp"
26
27
 
@@ -28,9 +29,9 @@ namespace datasketches {
28
29
 
29
30
  template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
30
31
  theta_union_base<EN, EK, P, S, CS, A>::theta_union_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
31
- uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
32
+ float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
32
33
  policy_(policy),
33
- table_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator),
34
+ table_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator),
34
35
  union_theta_(table_.theta_)
35
36
  {}
36
37
 
@@ -84,6 +85,12 @@ const P& theta_union_base<EN, EK, P, S, CS, A>::get_policy() const {
84
85
  return policy_;
85
86
  }
86
87
 
88
+ template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
89
+ void theta_union_base<EN, EK, P, S, CS, A>::reset() {
90
+ table_.reset();
91
+ union_theta_ = table_.theta_;
92
+ }
93
+
87
94
  } /* namespace datasketches */
88
95
 
89
96
  #endif
@@ -23,8 +23,8 @@
23
23
  namespace datasketches {
24
24
 
25
25
  template<typename A>
26
- theta_union_alloc<A>::theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const A& allocator):
27
- state_(lg_cur_size, lg_nom_size, rf, theta, seed, nop_policy(), allocator)
26
+ theta_union_alloc<A>::theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const A& allocator):
27
+ state_(lg_cur_size, lg_nom_size, rf, p, theta, seed, nop_policy(), allocator)
28
28
  {}
29
29
 
30
30
  template<typename A>
@@ -38,14 +38,17 @@ auto theta_union_alloc<A>::get_result(bool ordered) const -> CompactSketch {
38
38
  return state_.get_result(ordered);
39
39
  }
40
40
 
41
+ template<typename A>
42
+ void theta_union_alloc<A>::reset() {
43
+ state_.reset();
44
+ }
45
+
41
46
  template<typename A>
42
47
  theta_union_alloc<A>::builder::builder(const A& allocator): theta_base_builder<builder, A>(allocator) {}
43
48
 
44
49
  template<typename A>
45
50
  auto theta_union_alloc<A>::builder::build() const -> theta_union_alloc {
46
- return theta_union_alloc(
47
- this->starting_sub_multiple(this->lg_k_ + 1, this->MIN_LG_K, static_cast<uint8_t>(this->rf_)),
48
- this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->allocator_);
51
+ return theta_union_alloc(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->allocator_);
49
52
  }
50
53
 
51
54
  } /* namespace datasketches */
@@ -40,8 +40,8 @@ struct theta_update_sketch_base {
40
40
  using resize_factor = theta_constants::resize_factor;
41
41
  using comparator = compare_by_key<ExtractKey>;
42
42
 
43
- theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta,
44
- uint64_t seed, const Allocator& allocator, bool is_empty = true);
43
+ theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p,
44
+ uint64_t theta, uint64_t seed, const Allocator& allocator, bool is_empty = true);
45
45
  theta_update_sketch_base(const theta_update_sketch_base& other);
46
46
  theta_update_sketch_base(theta_update_sketch_base&& other) noexcept;
47
47
  ~theta_update_sketch_base();
@@ -75,6 +75,7 @@ struct theta_update_sketch_base {
75
75
  uint8_t lg_cur_size_;
76
76
  uint8_t lg_nom_size_;
77
77
  resize_factor rf_;
78
+ float p_;
78
79
  uint32_t num_entries_;
79
80
  uint64_t theta_;
80
81
  uint64_t seed_;
@@ -83,6 +84,7 @@ struct theta_update_sketch_base {
83
84
  void resize();
84
85
  void rebuild();
85
86
  void trim();
87
+ void reset();
86
88
 
87
89
  static inline uint32_t get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size);
88
90
  static inline uint32_t get_stride(uint64_t key, uint8_t lg_size);
@@ -94,11 +96,14 @@ struct theta_update_sketch_base {
94
96
  template<typename Derived, typename Allocator>
95
97
  class theta_base_builder {
96
98
  public:
99
+ // TODO: Redundant and deprecated. Will be removed in next major version release.
97
100
  using resize_factor = theta_constants::resize_factor;
98
101
  static const uint8_t MIN_LG_K = theta_constants::MIN_LG_K;
99
102
  static const uint8_t MAX_LG_K = theta_constants::MAX_LG_K;
100
- static const uint8_t DEFAULT_LG_K = 12;
101
- static const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
103
+ // TODO: The following defaults are redundant and deprecated. Will be removed in the
104
+ // next major version release
105
+ static const uint8_t DEFAULT_LG_K = theta_constants::DEFAULT_LG_K;
106
+ static const resize_factor DEFAULT_RESIZE_FACTOR = theta_constants::DEFAULT_RESIZE_FACTOR;
102
107
 
103
108
  /**
104
109
  * Creates and instance of the builder with default parameters.
@@ -146,7 +151,6 @@ protected:
146
151
 
147
152
  uint64_t starting_theta() const;
148
153
  uint8_t starting_lg_size() const;
149
- static uint8_t starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf);
150
154
  };
151
155
 
152
156
  // key extractor
@@ -23,16 +23,20 @@
23
23
  #include <iostream>
24
24
  #include <sstream>
25
25
  #include <algorithm>
26
+ #include <stdexcept>
27
+
28
+ #include "theta_helpers.hpp"
26
29
 
27
30
  namespace datasketches {
28
31
 
29
32
  template<typename EN, typename EK, typename A>
30
- theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const A& allocator, bool is_empty):
33
+ theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const A& allocator, bool is_empty):
31
34
  allocator_(allocator),
32
35
  is_empty_(is_empty),
33
36
  lg_cur_size_(lg_cur_size),
34
37
  lg_nom_size_(lg_nom_size),
35
38
  rf_(rf),
39
+ p_(p),
36
40
  num_entries_(0),
37
41
  theta_(theta),
38
42
  seed_(seed),
@@ -52,6 +56,7 @@ is_empty_(other.is_empty_),
52
56
  lg_cur_size_(other.lg_cur_size_),
53
57
  lg_nom_size_(other.lg_nom_size_),
54
58
  rf_(other.rf_),
59
+ p_(other.p_),
55
60
  num_entries_(other.num_entries_),
56
61
  theta_(other.theta_),
57
62
  seed_(other.seed_),
@@ -77,6 +82,7 @@ is_empty_(other.is_empty_),
77
82
  lg_cur_size_(other.lg_cur_size_),
78
83
  lg_nom_size_(other.lg_nom_size_),
79
84
  rf_(other.rf_),
85
+ p_(other.p_),
80
86
  num_entries_(other.num_entries_),
81
87
  theta_(other.theta_),
82
88
  seed_(other.seed_),
@@ -105,6 +111,7 @@ theta_update_sketch_base<EN, EK, A>& theta_update_sketch_base<EN, EK, A>::operat
105
111
  std::swap(lg_cur_size_, copy.lg_cur_size_);
106
112
  std::swap(lg_nom_size_, copy.lg_nom_size_);
107
113
  std::swap(rf_, copy.rf_);
114
+ std::swap(p_, copy.p_);
108
115
  std::swap(num_entries_, copy.num_entries_);
109
116
  std::swap(theta_, copy.theta_);
110
117
  std::swap(seed_, copy.seed_);
@@ -119,6 +126,7 @@ theta_update_sketch_base<EN, EK, A>& theta_update_sketch_base<EN, EK, A>::operat
119
126
  std::swap(lg_cur_size_, other.lg_cur_size_);
120
127
  std::swap(lg_nom_size_, other.lg_nom_size_);
121
128
  std::swap(rf_, other.rf_);
129
+ std::swap(p_, other.p_);
122
130
  std::swap(num_entries_, other.num_entries_);
123
131
  std::swap(theta_, other.theta_);
124
132
  std::swap(seed_, other.seed_);
@@ -247,6 +255,29 @@ void theta_update_sketch_base<EN, EK, A>::trim() {
247
255
  if (num_entries_ > static_cast<uint32_t>(1 << lg_nom_size_)) rebuild();
248
256
  }
249
257
 
258
+ template<typename EN, typename EK, typename A>
259
+ void theta_update_sketch_base<EN, EK, A>::reset() {
260
+ const size_t cur_size = 1ULL << lg_cur_size_;
261
+ for (size_t i = 0; i < cur_size; ++i) {
262
+ if (EK()(entries_[i]) != 0) {
263
+ entries_[i].~EN();
264
+ EK()(entries_[i]) = 0;
265
+ }
266
+ }
267
+ const uint8_t starting_lg_size = theta_build_helper<true>::starting_sub_multiple(
268
+ lg_nom_size_ + 1, theta_constants::MIN_LG_K, static_cast<uint8_t>(rf_));
269
+ if (starting_lg_size != lg_cur_size_) {
270
+ allocator_.deallocate(entries_, cur_size);
271
+ lg_cur_size_ = starting_lg_size;
272
+ const size_t new_size = 1ULL << starting_lg_size;
273
+ entries_ = allocator_.allocate(new_size);
274
+ for (size_t i = 0; i < new_size; ++i) EK()(entries_[i]) = 0;
275
+ }
276
+ num_entries_ = 0;
277
+ theta_ = theta_build_helper<true>::starting_theta_from_p(p_);
278
+ is_empty_ = true;
279
+ }
280
+
250
281
  template<typename EN, typename EK, typename A>
251
282
  void theta_update_sketch_base<EN, EK, A>::consolidate_non_empty(EN* entries, size_t size, size_t num) {
252
283
  // find the first empty slot
@@ -271,7 +302,11 @@ void theta_update_sketch_base<EN, EK, A>::consolidate_non_empty(EN* entries, siz
271
302
 
272
303
  template<typename Derived, typename Allocator>
273
304
  theta_base_builder<Derived, Allocator>::theta_base_builder(const Allocator& allocator):
274
- allocator_(allocator), lg_k_(DEFAULT_LG_K), rf_(DEFAULT_RESIZE_FACTOR), p_(1), seed_(DEFAULT_SEED) {}
305
+ allocator_(allocator),
306
+ lg_k_(theta_constants::DEFAULT_LG_K),
307
+ rf_(theta_constants::DEFAULT_RESIZE_FACTOR),
308
+ p_(1),
309
+ seed_(DEFAULT_SEED) {}
275
310
 
276
311
  template<typename Derived, typename Allocator>
277
312
  Derived& theta_base_builder<Derived, Allocator>::set_lg_k(uint8_t lg_k) {
@@ -306,18 +341,12 @@ Derived& theta_base_builder<Derived, Allocator>::set_seed(uint64_t seed) {
306
341
 
307
342
  template<typename Derived, typename Allocator>
308
343
  uint64_t theta_base_builder<Derived, Allocator>::starting_theta() const {
309
- if (p_ < 1) return static_cast<uint64_t>(theta_constants::MAX_THETA * p_);
310
- return theta_constants::MAX_THETA;
344
+ return theta_build_helper<true>::starting_theta_from_p(p_);
311
345
  }
312
346
 
313
347
  template<typename Derived, typename Allocator>
314
348
  uint8_t theta_base_builder<Derived, Allocator>::starting_lg_size() const {
315
- return starting_sub_multiple(lg_k_ + 1, MIN_LG_K, static_cast<uint8_t>(rf_));
316
- }
317
-
318
- template<typename Derived, typename Allocator>
319
- uint8_t theta_base_builder<Derived, Allocator>::starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf) {
320
- return (lg_tgt <= lg_min) ? lg_min : (lg_rf == 0) ? lg_tgt : ((lg_tgt - lg_min) % lg_rf) + lg_min;
349
+ return theta_build_helper<true>::starting_sub_multiple(lg_k_ + 1, MIN_LG_K, static_cast<uint8_t>(rf_));
321
350
  }
322
351
 
323
352
  // iterator
@@ -43,4 +43,5 @@ target_sources(theta_test
43
43
  theta_intersection_test.cpp
44
44
  theta_a_not_b_test.cpp
45
45
  theta_jaccard_similarity_test.cpp
46
+ theta_setop_test.cpp
46
47
  )
@@ -21,6 +21,8 @@
21
21
 
22
22
  #include <theta_a_not_b.hpp>
23
23
 
24
+ #include <stdexcept>
25
+
24
26
  namespace datasketches {
25
27
 
26
28
  TEST_CASE("theta a-not-b: empty", "[theta_a_not_b]") {