datasketches 0.1.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (205) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +17 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  6. data/ext/datasketches/ext.cpp +1 -1
  7. data/ext/datasketches/ext.h +4 -0
  8. data/ext/datasketches/extconf.rb +1 -1
  9. data/ext/datasketches/fi_wrapper.cpp +6 -8
  10. data/ext/datasketches/hll_wrapper.cpp +13 -14
  11. data/ext/datasketches/kll_wrapper.cpp +28 -76
  12. data/ext/datasketches/theta_wrapper.cpp +27 -41
  13. data/ext/datasketches/vo_wrapper.cpp +4 -6
  14. data/lib/datasketches/version.rb +1 -1
  15. data/vendor/datasketches-cpp/CMakeLists.txt +10 -0
  16. data/vendor/datasketches-cpp/LICENSE +40 -3
  17. data/vendor/datasketches-cpp/NOTICE +1 -1
  18. data/vendor/datasketches-cpp/README.md +4 -4
  19. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +18 -7
  20. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  21. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  24. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  25. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  26. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  27. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  28. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  29. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +13 -3
  31. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +20 -20
  32. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +116 -105
  33. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +22 -6
  34. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +140 -101
  35. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  36. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +20 -20
  37. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -16
  38. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +6 -6
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +10 -10
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +21 -21
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  42. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  43. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  46. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  47. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +102 -105
  48. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  49. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +141 -125
  50. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  51. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +5 -5
  52. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  53. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +81 -109
  54. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +25 -24
  55. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  56. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +5 -5
  57. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +89 -105
  58. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +13 -13
  59. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +130 -165
  60. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +21 -22
  61. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  62. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  63. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  64. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  65. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +88 -83
  66. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  67. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +34 -45
  68. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +7 -8
  69. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +41 -52
  70. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +7 -8
  71. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +220 -251
  72. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +42 -42
  73. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +36 -38
  74. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  75. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +15 -14
  76. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +47 -44
  77. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +62 -87
  78. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +121 -128
  79. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  80. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  81. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  82. data/vendor/datasketches-cpp/hll/include/hll.hpp +25 -53
  83. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +8 -8
  84. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +36 -36
  85. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +28 -28
  86. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  87. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +37 -37
  88. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +57 -61
  89. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  90. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  91. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  92. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  93. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  94. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  95. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +40 -25
  96. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +50 -6
  97. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +164 -136
  98. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  99. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  100. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  101. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  102. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +178 -88
  103. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  104. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  105. data/vendor/datasketches-cpp/python/CMakeLists.txt +12 -6
  106. data/vendor/datasketches-cpp/python/README.md +52 -49
  107. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  108. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  109. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  110. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -6
  111. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +4 -2
  112. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  113. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +38 -28
  114. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
  115. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  116. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -2
  117. data/vendor/datasketches-cpp/python/tests/kll_test.py +5 -5
  118. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  119. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  120. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  121. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  122. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  123. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +18 -8
  124. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  125. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +488 -0
  126. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  127. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  128. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  129. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  130. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  131. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  132. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  133. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  134. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  135. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  136. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  137. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  138. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +19 -13
  139. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +130 -127
  140. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  141. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +41 -49
  142. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  143. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  144. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  145. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -44
  146. data/vendor/datasketches-cpp/setup.py +11 -6
  147. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  148. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +3 -2
  149. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  150. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  151. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
  152. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  153. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  154. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  155. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +11 -4
  156. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  157. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +26 -28
  158. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  159. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  160. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  161. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  162. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +24 -36
  163. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  164. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  165. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +163 -256
  166. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +250 -651
  167. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  168. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  169. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +6 -1
  170. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  171. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +10 -21
  172. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +44 -30
  173. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  174. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  175. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  176. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +60 -5
  177. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +74 -235
  178. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
  179. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  180. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
  181. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  182. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  183. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +57 -70
  184. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  185. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
  186. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +18 -21
  187. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +13 -16
  188. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +7 -6
  189. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +3 -3
  190. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
  191. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +13 -16
  192. metadata +51 -36
  193. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  194. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  195. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  196. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  197. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  198. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  199. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  200. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  201. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  202. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  203. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  204. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  205. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -20,103 +20,70 @@
20
20
  #ifndef THETA_UNION_HPP_
21
21
  #define THETA_UNION_HPP_
22
22
 
23
- #include <memory>
24
- #include <functional>
25
- #include <climits>
26
-
23
+ #include "serde.hpp"
27
24
  #include "theta_sketch.hpp"
25
+ #include "theta_union_base.hpp"
28
26
 
29
27
  namespace datasketches {
30
28
 
31
- /*
32
- * author Alexander Saydakov
33
- * author Lee Rhodes
34
- * author Kevin Lang
35
- */
36
-
37
- template<typename A>
29
+ template<typename Allocator = std::allocator<uint64_t>>
38
30
  class theta_union_alloc {
39
31
  public:
40
- class builder;
32
+ using Entry = uint64_t;
33
+ using ExtractKey = trivial_extract_key;
34
+ using Sketch = theta_sketch_alloc<Allocator>;
35
+ using CompactSketch = compact_theta_sketch_alloc<Allocator>;
36
+ using resize_factor = theta_constants::resize_factor;
37
+
38
+ struct nop_policy {
39
+ void operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
40
+ unused(internal_entry);
41
+ unused(incoming_entry);
42
+ }
43
+ };
44
+ using State = theta_union_base<Entry, ExtractKey, nop_policy, Sketch, CompactSketch, Allocator>;
41
45
 
42
46
  // No constructor here. Use builder instead.
47
+ class builder;
43
48
 
44
49
  /**
45
50
  * This method is to update the union with a given sketch
46
51
  * @param sketch to update the union with
47
52
  */
48
- void update(const theta_sketch_alloc<A>& sketch);
53
+ template<typename FwdSketch>
54
+ void update(FwdSketch&& sketch);
49
55
 
50
56
  /**
51
57
  * This method produces a copy of the current state of the union as a compact sketch.
52
58
  * @param ordered optional flag to specify if ordered sketch should be produced
53
59
  * @return the result of the union
54
60
  */
55
- compact_theta_sketch_alloc<A> get_result(bool ordered = true) const;
61
+ CompactSketch get_result(bool ordered = true) const;
56
62
 
57
63
  private:
58
- bool is_empty_;
59
- uint64_t theta_;
60
- update_theta_sketch_alloc<A> state_;
64
+ State state_;
61
65
 
62
66
  // for builder
63
- theta_union_alloc(uint64_t theta, update_theta_sketch_alloc<A>&& state);
67
+ theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Allocator& allocator);
64
68
  };
65
69
 
66
- // builder
67
-
68
70
  template<typename A>
69
- class theta_union_alloc<A>::builder {
71
+ class theta_union_alloc<A>::builder: public theta_base_builder<builder, A> {
70
72
  public:
71
- typedef typename update_theta_sketch_alloc<A>::resize_factor resize_factor;
72
-
73
- /**
74
- * Set log2(k), where k is a nominal number of entries in the sketch
75
- * @param lg_k base 2 logarithm of nominal number of entries
76
- * @return this builder
77
- */
78
- builder& set_lg_k(uint8_t lg_k);
79
-
80
- /**
81
- * Set resize factor for the internal hash table (defaults to 8)
82
- * @param rf resize factor
83
- * @return this builder
84
- */
85
- builder& set_resize_factor(resize_factor rf);
86
-
87
- /**
88
- * Set sampling probability (initial theta). The default is 1, so the sketch retains
89
- * all entries until it reaches the limit, at which point it goes into the estimation mode
90
- * and reduces the effective sampling probability (theta) as necessary.
91
- * @param p sampling probability
92
- * @return this builder
93
- */
94
- builder& set_p(float p);
95
-
96
- /**
97
- * Set the seed for the hash function. Should be used carefully if needed.
98
- * Sketches produced with different seed are not compatible
99
- * and cannot be mixed in set operations.
100
- * @param seed hash seed
101
- * @return this builder
102
- */
103
- builder& set_seed(uint64_t seed);
73
+ builder(const A& allocator = A());
104
74
 
105
75
  /**
106
76
  * This is to create an instance of the union with predefined parameters.
107
- * @return and instance of the union
77
+ * @return an instance of the union
108
78
  */
109
79
  theta_union_alloc<A> build() const;
110
-
111
- private:
112
- typename update_theta_sketch_alloc<A>::builder sketch_builder;
113
80
  };
114
81
 
115
82
  // alias with default allocator for convenience
116
- typedef theta_union_alloc<std::allocator<void>> theta_union;
83
+ using theta_union = theta_union_alloc<std::allocator<uint64_t>>;
117
84
 
118
85
  } /* namespace datasketches */
119
86
 
120
87
  #include "theta_union_impl.hpp"
121
88
 
122
- # endif
89
+ #endif
@@ -30,7 +30,7 @@ template<
30
30
  typename Policy,
31
31
  typename Sketch,
32
32
  typename CompactSketch,
33
- typename Allocator = std::allocator<Entry>
33
+ typename Allocator
34
34
  >
35
35
  class theta_union_base {
36
36
  public:
@@ -17,6 +17,9 @@
17
17
  * under the License.
18
18
  */
19
19
 
20
+ #ifndef THETA_UNION_BASE_IMPL_HPP_
21
+ #define THETA_UNION_BASE_IMPL_HPP_
22
+
20
23
  #include <algorithm>
21
24
 
22
25
  #include "conditional_forward.hpp"
@@ -40,7 +43,7 @@ void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
40
43
  if (sketch.get_theta64() < union_theta_) union_theta_ = sketch.get_theta64();
41
44
  for (auto& entry: sketch) {
42
45
  const uint64_t hash = EK()(entry);
43
- if (hash < union_theta_) {
46
+ if (hash < union_theta_ && hash < table_.theta_) {
44
47
  auto result = table_.find(hash);
45
48
  if (!result.second) {
46
49
  table_.insert(result.first, conditional_forward<SS>(entry));
@@ -82,3 +85,5 @@ const P& theta_union_base<EN, EK, P, S, CS, A>::get_policy() const {
82
85
  }
83
86
 
84
87
  } /* namespace datasketches */
88
+
89
+ #endif
@@ -22,86 +22,30 @@
22
22
 
23
23
  namespace datasketches {
24
24
 
25
- /*
26
- * author Alexander Saydakov
27
- * author Lee Rhodes
28
- * author Kevin Lang
29
- */
30
-
31
- template<typename A>
32
- theta_union_alloc<A>::theta_union_alloc(uint64_t theta, update_theta_sketch_alloc<A>&& state):
33
- is_empty_(true), theta_(theta), state_(std::move(state)) {}
34
-
35
- template<typename A>
36
- void theta_union_alloc<A>::update(const theta_sketch_alloc<A>& sketch) {
37
- if (sketch.is_empty()) return;
38
- if (sketch.get_seed_hash() != state_.get_seed_hash()) throw std::invalid_argument("seed hash mismatch");
39
- is_empty_ = false;
40
- if (sketch.get_theta64() < theta_) theta_ = sketch.get_theta64();
41
- if (sketch.is_ordered()) {
42
- for (auto hash: sketch) {
43
- if (hash >= theta_) break; // early stop
44
- state_.internal_update(hash);
45
- }
46
- } else {
47
- for (auto hash: sketch) if (hash < theta_) state_.internal_update(hash);
48
- }
49
- if (state_.get_theta64() < theta_) theta_ = state_.get_theta64();
50
- }
51
-
52
25
  template<typename A>
53
- compact_theta_sketch_alloc<A> theta_union_alloc<A>::get_result(bool ordered) const {
54
- if (is_empty_) return state_.compact(ordered);
55
- const uint32_t nom_num_keys = 1 << state_.lg_nom_size_;
56
- if (theta_ >= state_.theta_ && state_.get_num_retained() <= nom_num_keys) return state_.compact(ordered);
57
- uint64_t theta = std::min(theta_, state_.get_theta64());
58
- vector_u64<A> keys(state_.get_num_retained());
59
- uint32_t num_keys = 0;
60
- for (auto key: state_) {
61
- if (key < theta) keys[num_keys++] = key;
62
- }
63
- if (num_keys > nom_num_keys) {
64
- std::nth_element(keys.begin(), keys.begin() + nom_num_keys, keys.begin() + num_keys);
65
- theta = keys[nom_num_keys];
66
- num_keys = nom_num_keys;
67
- }
68
- if (num_keys != state_.get_num_retained()) {
69
- keys.resize(num_keys);
70
- }
71
- if (ordered) std::sort(keys.begin(), keys.end());
72
- return compact_theta_sketch_alloc<A>(false, theta, std::move(keys), state_.get_seed_hash(), ordered);
73
- }
74
-
75
- // builder
26
+ theta_union_alloc<A>::theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const A& allocator):
27
+ state_(lg_cur_size, lg_nom_size, rf, theta, seed, nop_policy(), allocator)
28
+ {}
76
29
 
77
30
  template<typename A>
78
- typename theta_union_alloc<A>::builder& theta_union_alloc<A>::builder::set_lg_k(uint8_t lg_k) {
79
- sketch_builder.set_lg_k(lg_k);
80
- return *this;
31
+ template<typename SS>
32
+ void theta_union_alloc<A>::update(SS&& sketch) {
33
+ state_.update(std::forward<SS>(sketch));
81
34
  }
82
35
 
83
36
  template<typename A>
84
- typename theta_union_alloc<A>::builder& theta_union_alloc<A>::builder::set_resize_factor(resize_factor rf) {
85
- sketch_builder.set_resize_factor(rf);
86
- return *this;
37
+ auto theta_union_alloc<A>::get_result(bool ordered) const -> CompactSketch {
38
+ return state_.get_result(ordered);
87
39
  }
88
40
 
89
41
  template<typename A>
90
- typename theta_union_alloc<A>::builder& theta_union_alloc<A>::builder::set_p(float p) {
91
- sketch_builder.set_p(p);
92
- return *this;
93
- }
94
-
95
- template<typename A>
96
- typename theta_union_alloc<A>::builder& theta_union_alloc<A>::builder::set_seed(uint64_t seed) {
97
- sketch_builder.set_seed(seed);
98
- return *this;
99
- }
42
+ theta_union_alloc<A>::builder::builder(const A& allocator): theta_base_builder<builder, A>(allocator) {}
100
43
 
101
44
  template<typename A>
102
- theta_union_alloc<A> theta_union_alloc<A>::builder::build() const {
103
- update_theta_sketch_alloc<A> sketch = sketch_builder.build();
104
- return theta_union_alloc(sketch.get_theta64(), std::move(sketch));
45
+ auto theta_union_alloc<A>::builder::build() const -> theta_union_alloc {
46
+ return theta_union_alloc(
47
+ this->starting_sub_multiple(this->lg_k_ + 1, this->MIN_LG_K, static_cast<uint8_t>(this->rf_)),
48
+ this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->allocator_);
105
49
  }
106
50
 
107
51
  } /* namespace datasketches */
@@ -34,7 +34,7 @@ namespace datasketches {
34
34
  template<
35
35
  typename Entry,
36
36
  typename ExtractKey,
37
- typename Allocator = std::allocator<Entry>
37
+ typename Allocator
38
38
  >
39
39
  struct theta_update_sketch_base {
40
40
  using resize_factor = theta_constants::resize_factor;
@@ -53,6 +53,8 @@ struct theta_update_sketch_base {
53
53
  inline uint64_t hash_and_screen(const void* data, size_t length);
54
54
 
55
55
  inline std::pair<iterator, bool> find(uint64_t key) const;
56
+ static inline std::pair<iterator, bool> find(Entry* entries, uint8_t lg_size, uint64_t key);
57
+
56
58
 
57
59
  template<typename FwdEntry>
58
60
  inline void insert(iterator it, FwdEntry&& entry);
@@ -92,11 +94,14 @@ struct theta_update_sketch_base {
92
94
  template<typename Derived, typename Allocator>
93
95
  class theta_base_builder {
94
96
  public:
97
+ // TODO: Redundant and deprecated. Will be removed in next major verison release.
95
98
  using resize_factor = theta_constants::resize_factor;
96
99
  static const uint8_t MIN_LG_K = theta_constants::MIN_LG_K;
97
100
  static const uint8_t MAX_LG_K = theta_constants::MAX_LG_K;
98
- static const uint8_t DEFAULT_LG_K = 12;
99
- static const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
101
+ // TODO: The following defaults are redundant and deprecated. Will be removed in the
102
+ // next major version release
103
+ static const uint8_t DEFAULT_LG_K = theta_constants::DEFAULT_LG_K;
104
+ static const resize_factor DEFAULT_RESIZE_FACTOR = theta_constants::DEFAULT_RESIZE_FACTOR;
100
105
 
101
106
  /**
102
107
  * Creates and instance of the builder with default parameters.
@@ -147,7 +152,7 @@ protected:
147
152
  static uint8_t starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf);
148
153
  };
149
154
 
150
- // key extractors
155
+ // key extractor
151
156
 
152
157
  struct trivial_extract_key {
153
158
  template<typename T>
@@ -156,17 +161,7 @@ struct trivial_extract_key {
156
161
  }
157
162
  };
158
163
 
159
- template<typename K, typename V>
160
- struct pair_extract_key {
161
- K& operator()(std::pair<K, V>& entry) const {
162
- return entry.first;
163
- }
164
- const K& operator()(const std::pair<K, V>& entry) const {
165
- return entry.first;
166
- }
167
- };
168
-
169
- // not zero
164
+ // key not zero
170
165
 
171
166
  template<typename Entry, typename ExtractKey>
172
167
  class key_not_zero {
@@ -195,12 +190,6 @@ static inline uint64_t compute_hash(const void* data, size_t length, uint64_t se
195
190
  return (hashes.h1 >> 1); // Java implementation does unsigned shift >>> to make values positive
196
191
  }
197
192
 
198
- static inline uint16_t compute_seed_hash(uint64_t seed) {
199
- HashState hashes;
200
- MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
201
- return hashes.h1;
202
- }
203
-
204
193
  // iterators
205
194
 
206
195
  template<typename Entry, typename ExtractKey>
@@ -17,6 +17,9 @@
17
17
  * under the License.
18
18
  */
19
19
 
20
+ #ifndef THETA_UPDATE_SKETCH_BASE_IMPL_HPP_
21
+ #define THETA_UPDATE_SKETCH_BASE_IMPL_HPP_
22
+
20
23
  #include <iostream>
21
24
  #include <sstream>
22
25
  #include <algorithm>
@@ -36,7 +39,7 @@ seed_(seed),
36
39
  entries_(nullptr)
37
40
  {
38
41
  if (lg_cur_size > 0) {
39
- const size_t size = 1 << lg_cur_size;
42
+ const size_t size = 1ULL << lg_cur_size;
40
43
  entries_ = allocator_.allocate(size);
41
44
  for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
42
45
  }
@@ -55,7 +58,7 @@ seed_(other.seed_),
55
58
  entries_(nullptr)
56
59
  {
57
60
  if (other.entries_ != nullptr) {
58
- const size_t size = 1 << lg_cur_size_;
61
+ const size_t size = 1ULL << lg_cur_size_;
59
62
  entries_ = allocator_.allocate(size);
60
63
  for (size_t i = 0; i < size; ++i) {
61
64
  if (EK()(other.entries_[i]) != 0) {
@@ -69,7 +72,7 @@ entries_(nullptr)
69
72
 
70
73
  template<typename EN, typename EK, typename A>
71
74
  theta_update_sketch_base<EN, EK, A>::theta_update_sketch_base(theta_update_sketch_base&& other) noexcept:
72
- allocator_(other.allocator_),
75
+ allocator_(std::move(other.allocator_)),
73
76
  is_empty_(other.is_empty_),
74
77
  lg_cur_size_(other.lg_cur_size_),
75
78
  lg_nom_size_(other.lg_nom_size_),
@@ -86,7 +89,7 @@ template<typename EN, typename EK, typename A>
86
89
  theta_update_sketch_base<EN, EK, A>::~theta_update_sketch_base()
87
90
  {
88
91
  if (entries_ != nullptr) {
89
- const size_t size = 1 << lg_cur_size_;
92
+ const size_t size = 1ULL << lg_cur_size_;
90
93
  for (size_t i = 0; i < size; ++i) {
91
94
  if (EK()(entries_[i]) != 0) entries_[i].~EN();
92
95
  }
@@ -133,18 +136,23 @@ uint64_t theta_update_sketch_base<EN, EK, A>::hash_and_screen(const void* data,
133
136
 
134
137
  template<typename EN, typename EK, typename A>
135
138
  auto theta_update_sketch_base<EN, EK, A>::find(uint64_t key) const -> std::pair<iterator, bool> {
136
- const size_t size = 1 << lg_cur_size_;
137
- const size_t mask = size - 1;
138
- const uint32_t stride = get_stride(key, lg_cur_size_);
139
+ return find(entries_, lg_cur_size_, key);
140
+ }
141
+
142
+ template<typename EN, typename EK, typename A>
143
+ auto theta_update_sketch_base<EN, EK, A>::find(EN* entries, uint8_t lg_size, uint64_t key) -> std::pair<iterator, bool> {
144
+ const uint32_t size = 1 << lg_size;
145
+ const uint32_t mask = size - 1;
146
+ const uint32_t stride = get_stride(key, lg_size);
139
147
  uint32_t index = static_cast<uint32_t>(key) & mask;
140
148
  // search for duplicate or zero
141
149
  const uint32_t loop_index = index;
142
150
  do {
143
- const uint64_t probe = EK()(entries_[index]);
151
+ const uint64_t probe = EK()(entries[index]);
144
152
  if (probe == 0) {
145
- return std::pair<iterator, bool>(&entries_[index], false);
153
+ return std::pair<iterator, bool>(&entries[index], false);
146
154
  } else if (probe == key) {
147
- return std::pair<iterator, bool>(&entries_[index], true);
155
+ return std::pair<iterator, bool>(&entries[index], true);
148
156
  }
149
157
  index = (index + stride) & mask;
150
158
  } while (index != loop_index);
@@ -172,13 +180,13 @@ auto theta_update_sketch_base<EN, EK, A>::begin() const -> iterator {
172
180
 
173
181
  template<typename EN, typename EK, typename A>
174
182
  auto theta_update_sketch_base<EN, EK, A>::end() const -> iterator {
175
- return &entries_[1 << lg_cur_size_];
183
+ return &entries_[1ULL << lg_cur_size_];
176
184
  }
177
185
 
178
186
  template<typename EN, typename EK, typename A>
179
187
  uint32_t theta_update_sketch_base<EN, EK, A>::get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size) {
180
188
  const double fraction = (lg_cur_size <= lg_nom_size) ? RESIZE_THRESHOLD : REBUILD_THRESHOLD;
181
- return std::floor(fraction * (1 << lg_cur_size));
189
+ return static_cast<uint32_t>(std::floor(fraction * (1 << lg_cur_size)));
182
190
  }
183
191
 
184
192
  template<typename EN, typename EK, typename A>
@@ -189,29 +197,29 @@ uint32_t theta_update_sketch_base<EN, EK, A>::get_stride(uint64_t key, uint8_t l
189
197
 
190
198
  template<typename EN, typename EK, typename A>
191
199
  void theta_update_sketch_base<EN, EK, A>::resize() {
192
- const size_t old_size = 1 << lg_cur_size_;
193
- const uint8_t lg_tgt_size = lg_nom_size_ + 1;
194
- const uint8_t factor = std::max(1, std::min(static_cast<int>(rf_), lg_tgt_size - lg_cur_size_));
195
- lg_cur_size_ += factor;
196
- const size_t new_size = 1 << lg_cur_size_;
197
- EN* old_entries = entries_;
198
- entries_ = allocator_.allocate(new_size);
199
- for (size_t i = 0; i < new_size; ++i) EK()(entries_[i]) = 0;
200
- num_entries_ = 0;
200
+ const size_t old_size = 1ULL << lg_cur_size_;
201
+ const uint8_t lg_new_size = std::min<uint8_t>(lg_cur_size_ + static_cast<uint8_t>(rf_), lg_nom_size_ + 1);
202
+ const size_t new_size = 1ULL << lg_new_size;
203
+ EN* new_entries = allocator_.allocate(new_size);
204
+ for (size_t i = 0; i < new_size; ++i) EK()(new_entries[i]) = 0;
201
205
  for (size_t i = 0; i < old_size; ++i) {
202
- const uint64_t key = EK()(old_entries[i]);
206
+ const uint64_t key = EK()(entries_[i]);
203
207
  if (key != 0) {
204
- insert(find(key).first, std::move(old_entries[i])); // consider a special insert with no comparison
205
- old_entries[i].~EN();
208
+ // always finds an empty slot in a larger table
209
+ new (find(new_entries, lg_new_size, key).first) EN(std::move(entries_[i]));
210
+ entries_[i].~EN();
211
+ EK()(entries_[i]) = 0;
206
212
  }
207
213
  }
208
- allocator_.deallocate(old_entries, old_size);
214
+ std::swap(entries_, new_entries);
215
+ lg_cur_size_ = lg_new_size;
216
+ allocator_.deallocate(new_entries, old_size);
209
217
  }
210
218
 
211
219
  // assumes number of entries > nominal size
212
220
  template<typename EN, typename EK, typename A>
213
221
  void theta_update_sketch_base<EN, EK, A>::rebuild() {
214
- const size_t size = 1 << lg_cur_size_;
222
+ const size_t size = 1ULL << lg_cur_size_;
215
223
  const uint32_t nominal_size = 1 << lg_nom_size_;
216
224
 
217
225
  // empty entries have uninitialized payloads
@@ -224,10 +232,10 @@ void theta_update_sketch_base<EN, EK, A>::rebuild() {
224
232
  const size_t num_old_entries = num_entries_;
225
233
  entries_ = allocator_.allocate(size);
226
234
  for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
227
- num_entries_ = 0;
235
+ num_entries_ = nominal_size;
228
236
  // relies on consolidating non-empty entries to the front
229
237
  for (size_t i = 0; i < nominal_size; ++i) {
230
- insert(find(EK()(old_entries[i])).first, std::move(old_entries[i])); // consider a special insert with no comparison
238
+ new (find(EK()(old_entries[i])).first) EN(std::move(old_entries[i]));
231
239
  old_entries[i].~EN();
232
240
  }
233
241
  for (size_t i = nominal_size; i < num_old_entries; ++i) old_entries[i].~EN();
@@ -263,7 +271,11 @@ void theta_update_sketch_base<EN, EK, A>::consolidate_non_empty(EN* entries, siz
263
271
 
264
272
  template<typename Derived, typename Allocator>
265
273
  theta_base_builder<Derived, Allocator>::theta_base_builder(const Allocator& allocator):
266
- allocator_(allocator), lg_k_(DEFAULT_LG_K), rf_(DEFAULT_RESIZE_FACTOR), p_(1), seed_(DEFAULT_SEED) {}
274
+ allocator_(allocator),
275
+ lg_k_(theta_constants::DEFAULT_LG_K),
276
+ rf_(theta_constants::DEFAULT_RESIZE_FACTOR),
277
+ p_(1),
278
+ seed_(DEFAULT_SEED) {}
267
279
 
268
280
  template<typename Derived, typename Allocator>
269
281
  Derived& theta_base_builder<Derived, Allocator>::set_lg_k(uint8_t lg_k) {
@@ -298,7 +310,7 @@ Derived& theta_base_builder<Derived, Allocator>::set_seed(uint64_t seed) {
298
310
 
299
311
  template<typename Derived, typename Allocator>
300
312
  uint64_t theta_base_builder<Derived, Allocator>::starting_theta() const {
301
- if (p_ < 1) return theta_constants::MAX_THETA * p_;
313
+ if (p_ < 1) return static_cast<uint64_t>(theta_constants::MAX_THETA * p_);
302
314
  return theta_constants::MAX_THETA;
303
315
  }
304
316
 
@@ -387,3 +399,5 @@ auto theta_const_iterator<Entry, ExtractKey>::operator*() const -> const Entry&
387
399
  }
388
400
 
389
401
  } /* namespace datasketches */
402
+
403
+ #endif
@@ -42,4 +42,5 @@ target_sources(theta_test
42
42
  theta_union_test.cpp
43
43
  theta_intersection_test.cpp
44
44
  theta_a_not_b_test.cpp
45
+ theta_jaccard_similarity_test.cpp
45
46
  )
@@ -37,7 +37,7 @@ TEST_CASE("theta a-not-b: empty", "[theta_a_not_b]") {
37
37
  TEST_CASE("theta a-not-b: non empty no retained keys", "[theta_a_not_b]") {
38
38
  update_theta_sketch a = update_theta_sketch::builder().build();
39
39
  a.update(1);
40
- update_theta_sketch b = update_theta_sketch::builder().set_p(0.001).build();
40
+ update_theta_sketch b = update_theta_sketch::builder().set_p(0.001f).build();
41
41
  theta_a_not_b a_not_b;
42
42
 
43
43
  // B is still empty
@@ -167,6 +167,28 @@ TEST_CASE("theta a-not-b: estimation mode half overlap", "[theta_a_not_b]") {
167
167
  REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
168
168
  }
169
169
 
170
+ TEST_CASE("theta a-not-b: estimation mode half overlap wrapped compact", "[theta_a_not_b]") {
171
+ update_theta_sketch a = update_theta_sketch::builder().build();
172
+ int value = 0;
173
+ for (int i = 0; i < 10000; i++) a.update(value++);
174
+ auto bytes_a = a.compact().serialize();
175
+
176
+ update_theta_sketch b = update_theta_sketch::builder().build();
177
+ value = 5000;
178
+ for (int i = 0; i < 10000; i++) b.update(value++);
179
+ auto bytes_b = b.compact().serialize();
180
+
181
+ theta_a_not_b a_not_b;
182
+
183
+ auto result = a_not_b.compute(
184
+ wrapped_compact_theta_sketch::wrap(bytes_a.data(), bytes_a.size()),
185
+ wrapped_compact_theta_sketch::wrap(bytes_b.data(), bytes_b.size())
186
+ );
187
+ REQUIRE_FALSE(result.is_empty());
188
+ REQUIRE(result.is_estimation_mode());
189
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
190
+ }
191
+
170
192
  TEST_CASE("theta a-not-b: estimation mode disjoint", "[theta_a_not_b]") {
171
193
  update_theta_sketch a = update_theta_sketch::builder().build();
172
194
  int value = 0;
@@ -48,7 +48,7 @@ TEST_CASE("theta intersection: empty", "[theta_intersection]") {
48
48
  }
49
49
 
50
50
  TEST_CASE("theta intersection: non empty no retained keys", "[theta_intersection]") {
51
- update_theta_sketch sketch = update_theta_sketch::builder().set_p(0.001).build();
51
+ update_theta_sketch sketch = update_theta_sketch::builder().set_p(0.001f).build();
52
52
  sketch.update(1);
53
53
  theta_intersection intersection;
54
54
  intersection.update(sketch);
@@ -174,6 +174,26 @@ TEST_CASE("theta intersection: estimation mode half overlap ordered", "[theta_in
174
174
  REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
175
175
  }
176
176
 
177
+ TEST_CASE("theta intersection: estimation mode half overlap ordered wrapped compact", "[theta_intersection]") {
178
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
179
+ int value = 0;
180
+ for (int i = 0; i < 10000; i++) sketch1.update(value++);
181
+ auto bytes1 = sketch1.compact().serialize();
182
+
183
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
184
+ value = 5000;
185
+ for (int i = 0; i < 10000; i++) sketch2.update(value++);
186
+ auto bytes2 = sketch2.compact().serialize();
187
+
188
+ theta_intersection intersection;
189
+ intersection.update(wrapped_compact_theta_sketch::wrap(bytes1.data(), bytes1.size()));
190
+ intersection.update(wrapped_compact_theta_sketch::wrap(bytes2.data(), bytes2.size()));
191
+ compact_theta_sketch result = intersection.get_result();
192
+ REQUIRE_FALSE(result.is_empty());
193
+ REQUIRE(result.is_estimation_mode());
194
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
195
+ }
196
+
177
197
  TEST_CASE("theta intersection: estimation mode disjoint unordered", "[theta_intersection]") {
178
198
  update_theta_sketch sketch1 = update_theta_sketch::builder().build();
179
199
  int value = 0;