datasketches 0.2.0 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/README.md +7 -7
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/theta_wrapper.cpp +20 -4
  8. data/lib/datasketches/version.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
  10. data/vendor/datasketches-cpp/LICENSE +40 -3
  11. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  12. data/vendor/datasketches-cpp/NOTICE +1 -1
  13. data/vendor/datasketches-cpp/README.md +76 -9
  14. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
  16. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
  17. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  18. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  19. data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
  20. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  21. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  22. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  23. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  24. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
  25. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
  26. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
  27. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
  28. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
  29. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
  31. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
  32. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
  33. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
  34. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
  35. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  36. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  37. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  38. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  39. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  40. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
  41. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
  42. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
  43. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  44. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
  45. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
  46. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  47. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
  48. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
  49. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
  50. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
  51. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
  52. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  53. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  54. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
  55. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  56. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
  57. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
  58. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
  59. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
  60. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
  61. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
  62. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
  63. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  64. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
  65. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
  66. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
  67. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
  68. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  69. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  70. data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
  71. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
  72. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
  73. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
  74. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  75. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
  76. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
  77. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  78. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  79. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  80. data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
  81. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  82. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  83. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  84. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
  85. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
  86. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
  87. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  88. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  89. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  90. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
  91. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  92. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  93. data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
  94. data/vendor/datasketches-cpp/python/README.md +50 -50
  95. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  96. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  97. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
  98. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
  99. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
  100. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
  101. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  102. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
  103. data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
  104. data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
  105. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  106. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  107. data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
  108. data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
  109. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
  110. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
  111. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
  112. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
  113. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
  114. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  115. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
  116. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
  117. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
  118. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  119. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
  120. data/vendor/datasketches-cpp/setup.py +10 -7
  121. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  122. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  124. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
  125. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
  126. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  127. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
  128. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
  129. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
  130. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
  131. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
  132. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
  133. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
  134. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
  135. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  136. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
  137. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  138. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
  139. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
  140. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  141. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  142. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  143. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  144. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  145. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  146. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  147. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  148. data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
  149. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
  150. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
  151. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
  152. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  153. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  154. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
  155. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  156. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  157. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
  158. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
  159. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  160. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  161. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
  162. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
  163. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
  164. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
  165. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
  166. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
  167. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
  168. metadata +18 -7
  169. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  170. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -17,8 +17,10 @@
17
17
  * under the License.
18
18
  */
19
19
 
20
+ #include <istream>
20
21
  #include <fstream>
21
22
  #include <sstream>
23
+ #include <vector>
22
24
 
23
25
  #include <catch.hpp>
24
26
  #include <theta_sketch.hpp>
@@ -39,6 +41,7 @@ TEST_CASE("theta sketch: empty", "[theta_sketch]") {
39
41
  REQUIRE(update_sketch.get_estimate() == 0.0);
40
42
  REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
41
43
  REQUIRE(update_sketch.get_upper_bound(1) == 0.0);
44
+ REQUIRE(update_sketch.is_ordered());
42
45
 
43
46
  compact_theta_sketch compact_sketch = update_sketch.compact();
44
47
  REQUIRE(compact_sketch.is_empty());
@@ -47,10 +50,14 @@ TEST_CASE("theta sketch: empty", "[theta_sketch]") {
47
50
  REQUIRE(compact_sketch.get_estimate() == 0.0);
48
51
  REQUIRE(compact_sketch.get_lower_bound(1) == 0.0);
49
52
  REQUIRE(compact_sketch.get_upper_bound(1) == 0.0);
53
+ REQUIRE(compact_sketch.is_ordered());
54
+
55
+ // empty is forced to be ordered
56
+ REQUIRE(update_sketch.compact(false).is_ordered());
50
57
  }
51
58
 
52
59
  TEST_CASE("theta sketch: non empty no retained keys", "[theta_sketch]") {
53
- update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.001).build();
60
+ update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.001f).build();
54
61
  update_sketch.update(1);
55
62
  //std::cerr << update_sketch.to_string();
56
63
  REQUIRE(update_sketch.get_num_retained() == 0);
@@ -67,6 +74,14 @@ TEST_CASE("theta sketch: non empty no retained keys", "[theta_sketch]") {
67
74
  REQUIRE(compact_sketch.get_estimate() == 0.0);
68
75
  REQUIRE(compact_sketch.get_lower_bound(1) == 0.0);
69
76
  REQUIRE(compact_sketch.get_upper_bound(1) > 0);
77
+
78
+ update_sketch.reset();
79
+ REQUIRE(update_sketch.is_empty());
80
+ REQUIRE_FALSE(update_sketch.is_estimation_mode());
81
+ REQUIRE(update_sketch.get_theta() == 1.0);
82
+ REQUIRE(update_sketch.get_estimate() == 0.0);
83
+ REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
84
+ REQUIRE(update_sketch.get_upper_bound(1) == 0.0);
70
85
  }
71
86
 
72
87
  TEST_CASE("theta sketch: single item", "[theta_sketch]") {
@@ -78,6 +93,7 @@ TEST_CASE("theta sketch: single item", "[theta_sketch]") {
78
93
  REQUIRE(update_sketch.get_estimate() == 1.0);
79
94
  REQUIRE(update_sketch.get_lower_bound(1) == 1.0);
80
95
  REQUIRE(update_sketch.get_upper_bound(1) == 1.0);
96
+ REQUIRE(update_sketch.is_ordered()); // one item is ordered
81
97
 
82
98
  compact_theta_sketch compact_sketch = update_sketch.compact();
83
99
  REQUIRE_FALSE(compact_sketch.is_empty());
@@ -86,6 +102,10 @@ TEST_CASE("theta sketch: single item", "[theta_sketch]") {
86
102
  REQUIRE(compact_sketch.get_estimate() == 1.0);
87
103
  REQUIRE(compact_sketch.get_lower_bound(1) == 1.0);
88
104
  REQUIRE(compact_sketch.get_upper_bound(1) == 1.0);
105
+ REQUIRE(compact_sketch.is_ordered());
106
+
107
+ // single item is forced to be ordered
108
+ REQUIRE(update_sketch.compact(false).is_ordered());
89
109
  }
90
110
 
91
111
  TEST_CASE("theta sketch: resize exact", "[theta_sketch]") {
@@ -97,6 +117,7 @@ TEST_CASE("theta sketch: resize exact", "[theta_sketch]") {
97
117
  REQUIRE(update_sketch.get_estimate() == 2000.0);
98
118
  REQUIRE(update_sketch.get_lower_bound(1) == 2000.0);
99
119
  REQUIRE(update_sketch.get_upper_bound(1) == 2000.0);
120
+ REQUIRE_FALSE(update_sketch.is_ordered());
100
121
 
101
122
  compact_theta_sketch compact_sketch = update_sketch.compact();
102
123
  REQUIRE_FALSE(compact_sketch.is_empty());
@@ -105,6 +126,17 @@ TEST_CASE("theta sketch: resize exact", "[theta_sketch]") {
105
126
  REQUIRE(compact_sketch.get_estimate() == 2000.0);
106
127
  REQUIRE(compact_sketch.get_lower_bound(1) == 2000.0);
107
128
  REQUIRE(compact_sketch.get_upper_bound(1) == 2000.0);
129
+ REQUIRE(compact_sketch.is_ordered());
130
+
131
+ update_sketch.reset();
132
+ REQUIRE(update_sketch.is_empty());
133
+ REQUIRE_FALSE(update_sketch.is_estimation_mode());
134
+ REQUIRE(update_sketch.get_theta() == 1.0);
135
+ REQUIRE(update_sketch.get_estimate() == 0.0);
136
+ REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
137
+ REQUIRE(update_sketch.get_upper_bound(1) == 0.0);
138
+ REQUIRE(update_sketch.is_ordered());
139
+
108
140
  }
109
141
 
110
142
  TEST_CASE("theta sketch: estimation", "[theta_sketch]") {
@@ -148,6 +180,34 @@ TEST_CASE("theta sketch: deserialize compact empty from java", "[theta_sketch]")
148
180
  REQUIRE(sketch.get_upper_bound(1) == 0.0);
149
181
  }
150
182
 
183
+ TEST_CASE("theta sketch: deserialize compact v1 empty from java", "[theta_sketch]") {
184
+ std::ifstream is;
185
+ is.exceptions(std::ios::failbit | std::ios::badbit);
186
+ is.open(inputPath + "theta_compact_empty_from_java_v1.sk", std::ios::binary);
187
+ auto sketch = compact_theta_sketch::deserialize(is);
188
+ REQUIRE(sketch.is_empty());
189
+ REQUIRE_FALSE(sketch.is_estimation_mode());
190
+ REQUIRE(sketch.get_num_retained() == 0);
191
+ REQUIRE(sketch.get_theta() == 1.0);
192
+ REQUIRE(sketch.get_estimate() == 0.0);
193
+ REQUIRE(sketch.get_lower_bound(1) == 0.0);
194
+ REQUIRE(sketch.get_upper_bound(1) == 0.0);
195
+ }
196
+
197
+ TEST_CASE("theta sketch: deserialize compact v2 empty from java", "[theta_sketch]") {
198
+ std::ifstream is;
199
+ is.exceptions(std::ios::failbit | std::ios::badbit);
200
+ is.open(inputPath + "theta_compact_empty_from_java_v2.sk", std::ios::binary);
201
+ auto sketch = compact_theta_sketch::deserialize(is);
202
+ REQUIRE(sketch.is_empty());
203
+ REQUIRE_FALSE(sketch.is_estimation_mode());
204
+ REQUIRE(sketch.get_num_retained() == 0);
205
+ REQUIRE(sketch.get_theta() == 1.0);
206
+ REQUIRE(sketch.get_estimate() == 0.0);
207
+ REQUIRE(sketch.get_lower_bound(1) == 0.0);
208
+ REQUIRE(sketch.get_upper_bound(1) == 0.0);
209
+ }
210
+
151
211
  TEST_CASE("theta sketch: deserialize single item from java", "[theta_sketch]") {
152
212
  std::ifstream is;
153
213
  is.exceptions(std::ios::failbit | std::ios::badbit);
@@ -162,6 +222,38 @@ TEST_CASE("theta sketch: deserialize single item from java", "[theta_sketch]") {
162
222
  REQUIRE(sketch.get_upper_bound(1) == 1.0);
163
223
  }
164
224
 
225
+ TEST_CASE("theta sketch: deserialize compact exact from java", "[theta_sketch]") {
226
+ std::ifstream is;
227
+ is.exceptions(std::ios::failbit | std::ios::badbit);
228
+ is.open(inputPath + "theta_compact_exact_from_java.sk", std::ios::binary);
229
+ auto sketch = compact_theta_sketch::deserialize(is);
230
+ REQUIRE_FALSE(sketch.is_empty());
231
+ REQUIRE_FALSE(sketch.is_estimation_mode());
232
+ REQUIRE(sketch.is_ordered());
233
+ REQUIRE(sketch.get_num_retained() == 100);
234
+
235
+ // the same construction process in Java must have produced exactly the same sketch
236
+ auto update_sketch = update_theta_sketch::builder().build();
237
+ const int n = 100;
238
+ for (int i = 0; i < n; i++) update_sketch.update(i);
239
+ REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
240
+ REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
241
+ REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
242
+ REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
243
+ REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
244
+ REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
245
+ REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
246
+ REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
247
+ REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
248
+ compact_theta_sketch compact_sketch = update_sketch.compact();
249
+ // the sketches are ordered, so the iteration sequence must match exactly
250
+ auto iter = sketch.begin();
251
+ for (const auto& key: compact_sketch) {
252
+ REQUIRE(*iter == key);
253
+ ++iter;
254
+ }
255
+ }
256
+
165
257
  TEST_CASE("theta sketch: deserialize compact estimation from java", "[theta_sketch]") {
166
258
  std::ifstream is;
167
259
  is.exceptions(std::ios::failbit | std::ios::badbit);
@@ -198,6 +290,78 @@ TEST_CASE("theta sketch: deserialize compact estimation from java", "[theta_sket
198
290
  }
199
291
  }
200
292
 
293
+ TEST_CASE("theta sketch: deserialize compact v1 estimation from java", "[theta_sketch]") {
294
+ std::ifstream is;
295
+ is.exceptions(std::ios::failbit | std::ios::badbit);
296
+ is.open(inputPath + "theta_compact_estimation_from_java_v1.sk", std::ios::binary);
297
+ auto sketch = compact_theta_sketch::deserialize(is);
298
+ REQUIRE_FALSE(sketch.is_empty());
299
+ REQUIRE(sketch.is_estimation_mode());
300
+ REQUIRE(sketch.is_ordered());
301
+ REQUIRE(sketch.get_num_retained() == 4342);
302
+ REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
303
+ REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
304
+ REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
305
+ REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
306
+
307
+ // the same construction process in Java must have produced exactly the same sketch
308
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
309
+ const int n = 8192;
310
+ for (int i = 0; i < n; i++) update_sketch.update(i);
311
+ REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
312
+ REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
313
+ REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
314
+ REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
315
+ REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
316
+ REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
317
+ REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
318
+ REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
319
+ REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
320
+ compact_theta_sketch compact_sketch = update_sketch.compact();
321
+ // the sketches are ordered, so the iteration sequence must match exactly
322
+ auto iter = sketch.begin();
323
+ for (const auto& key: compact_sketch) {
324
+ REQUIRE(*iter == key);
325
+ ++iter;
326
+ }
327
+ }
328
+
329
+ TEST_CASE("theta sketch: deserialize compact v2 estimation from java", "[theta_sketch]") {
330
+ std::ifstream is;
331
+ is.exceptions(std::ios::failbit | std::ios::badbit);
332
+ is.open(inputPath + "theta_compact_estimation_from_java_v2.sk", std::ios::binary);
333
+ auto sketch = compact_theta_sketch::deserialize(is);
334
+ REQUIRE_FALSE(sketch.is_empty());
335
+ REQUIRE(sketch.is_estimation_mode());
336
+ REQUIRE(sketch.is_ordered());
337
+ REQUIRE(sketch.get_num_retained() == 4342);
338
+ REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
339
+ REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
340
+ REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
341
+ REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
342
+
343
+ // the same construction process in Java must have produced exactly the same sketch
344
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
345
+ const int n = 8192;
346
+ for (int i = 0; i < n; i++) update_sketch.update(i);
347
+ REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
348
+ REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
349
+ REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
350
+ REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
351
+ REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
352
+ REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
353
+ REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
354
+ REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
355
+ REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
356
+ compact_theta_sketch compact_sketch = update_sketch.compact();
357
+ // the sketches are ordered, so the iteration sequence must match exactly
358
+ auto iter = sketch.begin();
359
+ for (const auto& key: compact_sketch) {
360
+ REQUIRE(*iter == key);
361
+ ++iter;
362
+ }
363
+ }
364
+
201
365
  TEST_CASE("theta sketch: serialize deserialize stream and bytes equivalence", "[theta_sketch]") {
202
366
  update_theta_sketch update_sketch = update_theta_sketch::builder().build();
203
367
  const int n = 8192;
@@ -238,4 +402,276 @@ TEST_CASE("theta sketch: deserialize compact single item buffer overrun", "[thet
238
402
  REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
239
403
  }
240
404
 
405
+ TEST_CASE("theta sketch: conversion constructor and wrapped compact", "[theta_sketch]") {
406
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
407
+ const int n = 8192;
408
+ for (int i = 0; i < n; i++) update_sketch.update(i);
409
+
410
+ // unordered
411
+ auto unordered_compact1 = update_sketch.compact(false);
412
+ compact_theta_sketch unordered_compact2(update_sketch, false);
413
+ auto it = unordered_compact1.begin();
414
+ for (auto entry: unordered_compact2) {
415
+ REQUIRE(*it == entry);
416
+ ++it;
417
+ }
418
+
419
+ // ordered
420
+ auto ordered_compact1 = update_sketch.compact();
421
+ compact_theta_sketch ordered_compact2(update_sketch, true);
422
+ it = ordered_compact1.begin();
423
+ for (auto entry: ordered_compact2) {
424
+ REQUIRE(*it == entry);
425
+ ++it;
426
+ }
427
+
428
+ // wrapped compact
429
+ auto bytes = ordered_compact1.serialize();
430
+ auto ordered_compact3 = wrapped_compact_theta_sketch::wrap(bytes.data(), bytes.size());
431
+ it = ordered_compact1.begin();
432
+ for (auto entry: ordered_compact3) {
433
+ REQUIRE(*it == entry);
434
+ ++it;
435
+ }
436
+ REQUIRE(ordered_compact3.get_estimate() == ordered_compact1.get_estimate());
437
+ REQUIRE(ordered_compact3.get_lower_bound(1) == ordered_compact1.get_lower_bound(1));
438
+ REQUIRE(ordered_compact3.get_upper_bound(1) == ordered_compact1.get_upper_bound(1));
439
+ REQUIRE(ordered_compact3.is_estimation_mode() == ordered_compact1.is_estimation_mode());
440
+ REQUIRE(ordered_compact3.get_theta() == ordered_compact1.get_theta());
441
+
442
+
443
+ // seed mismatch
444
+ REQUIRE_THROWS_AS(wrapped_compact_theta_sketch::wrap(bytes.data(), bytes.size(), 0), std::invalid_argument);
445
+ }
446
+
447
+ TEST_CASE("theta sketch: wrap compact empty from java", "[theta_sketch]") {
448
+ std::ifstream is;
449
+ is.exceptions(std::ios::failbit | std::ios::badbit);
450
+ is.open(inputPath + "theta_compact_empty_from_java.sk", std::ios::binary | std::ios::ate);
451
+
452
+ std::vector<uint8_t> buf;
453
+ if(is) {
454
+ auto size = is.tellg();
455
+ buf.reserve(size);
456
+ buf.assign(size, 0);
457
+ is.seekg(0, std::ios_base::beg);
458
+ is.read((char*)(buf.data()), buf.size());
459
+ }
460
+
461
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
462
+ REQUIRE(sketch.is_empty());
463
+ REQUIRE_FALSE(sketch.is_estimation_mode());
464
+ REQUIRE(sketch.get_num_retained() == 0);
465
+ REQUIRE(sketch.get_theta() == 1.0);
466
+ REQUIRE(sketch.get_estimate() == 0.0);
467
+ REQUIRE(sketch.get_lower_bound(1) == 0.0);
468
+ REQUIRE(sketch.get_upper_bound(1) == 0.0);
469
+ }
470
+
471
+ TEST_CASE("theta sketch: wrap compact v1 empty from java", "[theta_sketch]") {
472
+ std::ifstream is;
473
+ is.exceptions(std::ios::failbit | std::ios::badbit);
474
+ is.open(inputPath + "theta_compact_empty_from_java_v1.sk", std::ios::binary | std::ios::ate);
475
+
476
+ std::vector<uint8_t> buf;
477
+ if(is) {
478
+ auto size = is.tellg();
479
+ buf.reserve(size);
480
+ buf.assign(size, 0);
481
+ is.seekg(0, std::ios_base::beg);
482
+ is.read((char*)(buf.data()), buf.size());
483
+ }
484
+
485
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
486
+ REQUIRE(sketch.is_empty());
487
+ REQUIRE_FALSE(sketch.is_estimation_mode());
488
+ REQUIRE(sketch.get_num_retained() == 0);
489
+ REQUIRE(sketch.get_theta() == 1.0);
490
+ REQUIRE(sketch.get_estimate() == 0.0);
491
+ REQUIRE(sketch.get_lower_bound(1) == 0.0);
492
+ REQUIRE(sketch.get_upper_bound(1) == 0.0);
493
+ }
494
+
495
+ TEST_CASE("theta sketch: wrap compact v2 empty from java", "[theta_sketch]") {
496
+ std::ifstream is;
497
+ is.exceptions(std::ios::failbit | std::ios::badbit);
498
+ is.open(inputPath + "theta_compact_empty_from_java_v2.sk", std::ios::binary | std::ios::ate);
499
+
500
+ std::vector<uint8_t> buf;
501
+ if(is) {
502
+ auto size = is.tellg();
503
+ buf.reserve(size);
504
+ buf.assign(size, 0);
505
+ is.seekg(0, std::ios_base::beg);
506
+ is.read((char*)(buf.data()), buf.size());
507
+ }
508
+
509
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
510
+ REQUIRE(sketch.is_empty());
511
+ REQUIRE_FALSE(sketch.is_estimation_mode());
512
+ REQUIRE(sketch.get_num_retained() == 0);
513
+ REQUIRE(sketch.get_theta() == 1.0);
514
+ REQUIRE(sketch.get_estimate() == 0.0);
515
+ REQUIRE(sketch.get_lower_bound(1) == 0.0);
516
+ REQUIRE(sketch.get_upper_bound(1) == 0.0);
517
+ }
518
+
519
+ TEST_CASE("theta sketch: wrap single item from java", "[theta_sketch]") {
520
+ std::ifstream is;
521
+ is.exceptions(std::ios::failbit | std::ios::badbit);
522
+ is.open(inputPath + "theta_compact_single_item_from_java.sk", std::ios::binary | std::ios::ate);
523
+ std::vector<uint8_t> buf;
524
+ if(is) {
525
+ auto size = is.tellg();
526
+ buf.reserve(size);
527
+ buf.assign(size, 0);
528
+ is.seekg(0, std::ios_base::beg);
529
+ is.read((char*)(buf.data()), buf.size());
530
+ }
531
+
532
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
533
+ REQUIRE_FALSE(sketch.is_empty());
534
+ REQUIRE_FALSE(sketch.is_estimation_mode());
535
+ REQUIRE(sketch.get_num_retained() == 1);
536
+ REQUIRE(sketch.get_theta() == 1.0);
537
+ REQUIRE(sketch.get_estimate() == 1.0);
538
+ REQUIRE(sketch.get_lower_bound(1) == 1.0);
539
+ REQUIRE(sketch.get_upper_bound(1) == 1.0);
540
+ }
541
+
542
+ TEST_CASE("theta sketch: wrap compact estimation from java", "[theta_sketch]") {
543
+ std::ifstream is;
544
+ is.exceptions(std::ios::failbit | std::ios::badbit);
545
+ is.open(inputPath + "theta_compact_estimation_from_java.sk", std::ios::binary | std::ios::ate);
546
+ std::vector<uint8_t> buf;
547
+ if(is) {
548
+ auto size = is.tellg();
549
+ buf.reserve(size);
550
+ buf.assign(size, 0);
551
+ is.seekg(0, std::ios_base::beg);
552
+ is.read((char*)(buf.data()), buf.size());
553
+ }
554
+
555
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
556
+ REQUIRE_FALSE(sketch.is_empty());
557
+ REQUIRE(sketch.is_estimation_mode());
558
+ REQUIRE(sketch.is_ordered());
559
+ REQUIRE(sketch.get_num_retained() == 4342);
560
+ REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
561
+ REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
562
+ REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
563
+ REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
564
+
565
+ // the same construction process in Java must have produced exactly the same sketch
566
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
567
+ const int n = 8192;
568
+ for (int i = 0; i < n; i++) update_sketch.update(i);
569
+ REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
570
+ REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
571
+ REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
572
+ REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
573
+ REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
574
+ REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
575
+ REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
576
+ REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
577
+ REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
578
+ compact_theta_sketch compact_sketch = update_sketch.compact();
579
+ // the sketches are ordered, so the iteration sequence must match exactly
580
+ auto iter = sketch.begin();
581
+ for (const auto& key: compact_sketch) {
582
+ REQUIRE(*iter == key);
583
+ ++iter;
584
+ }
585
+ }
586
+
587
+ TEST_CASE("theta sketch: wrap compact v1 estimation from java", "[theta_sketch]") {
588
+ std::ifstream is;
589
+ is.exceptions(std::ios::failbit | std::ios::badbit);
590
+ is.open(inputPath + "theta_compact_estimation_from_java_v1.sk", std::ios::binary | std::ios::ate);
591
+ std::vector<uint8_t> buf;
592
+ if(is) {
593
+ auto size = is.tellg();
594
+ buf.reserve(size);
595
+ buf.assign(size, 0);
596
+ is.seekg(0, std::ios_base::beg);
597
+ is.read((char*)(buf.data()), buf.size());
598
+ }
599
+
600
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
601
+ REQUIRE_FALSE(sketch.is_empty());
602
+ REQUIRE(sketch.is_estimation_mode());
603
+ // REQUIRE(sketch.is_ordered()); // v1 may not be ordered
604
+ REQUIRE(sketch.get_num_retained() == 4342);
605
+ REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
606
+ REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
607
+ REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
608
+ REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
609
+
610
+ // the same construction process in Java must have produced exactly the same sketch
611
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
612
+ const int n = 8192;
613
+ for (int i = 0; i < n; i++) update_sketch.update(i);
614
+ REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
615
+ REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
616
+ REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
617
+ REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
618
+ REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
619
+ REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
620
+ REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
621
+ REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
622
+ REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
623
+ compact_theta_sketch compact_sketch = update_sketch.compact();
624
+ // the sketches are ordered, so the iteration sequence must match exactly
625
+ auto iter = sketch.begin();
626
+ for (const auto& key: compact_sketch) {
627
+ REQUIRE(*iter == key);
628
+ ++iter;
629
+ }
630
+ }
631
+
632
+ TEST_CASE("theta sketch: wrap compact v2 estimation from java", "[theta_sketch]") {
633
+ std::ifstream is;
634
+ is.exceptions(std::ios::failbit | std::ios::badbit);
635
+ is.open(inputPath + "theta_compact_estimation_from_java_v2.sk", std::ios::binary | std::ios::ate);
636
+ std::vector<uint8_t> buf;
637
+ if(is) {
638
+ auto size = is.tellg();
639
+ buf.reserve(size);
640
+ buf.assign(size, 0);
641
+ is.seekg(0, std::ios_base::beg);
642
+ is.read((char*)(buf.data()), buf.size());
643
+ }
644
+
645
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
646
+ REQUIRE_FALSE(sketch.is_empty());
647
+ REQUIRE(sketch.is_estimation_mode());
648
+ // REQUIRE(sketch.is_ordered()); // v1 may not be ordered
649
+ REQUIRE(sketch.get_num_retained() == 4342);
650
+ REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
651
+ REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
652
+ REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
653
+ REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
654
+
655
+ // the same construction process in Java must have produced exactly the same sketch
656
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
657
+ const int n = 8192;
658
+ for (int i = 0; i < n; i++) update_sketch.update(i);
659
+ REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
660
+ REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
661
+ REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
662
+ REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
663
+ REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
664
+ REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
665
+ REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
666
+ REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
667
+ REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
668
+ compact_theta_sketch compact_sketch = update_sketch.compact();
669
+ // the sketches are ordered, so the iteration sequence must match exactly
670
+ auto iter = sketch.begin();
671
+ for (const auto& key: compact_sketch) {
672
+ REQUIRE(*iter == key);
673
+ ++iter;
674
+ }
675
+ }
676
+
241
677
  } /* namespace datasketches */
@@ -39,7 +39,7 @@ TEST_CASE("theta union: empty", "[theta_union]") {
39
39
  }
40
40
 
41
41
  TEST_CASE("theta union: non empty no retained keys", "[theta_union]") {
42
- update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.001).build();
42
+ update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.001f).build();
43
43
  update_sketch.update(1);
44
44
  theta_union u = theta_union::builder().build();
45
45
  u.update(update_sketch);
@@ -51,40 +51,72 @@ TEST_CASE("theta union: non empty no retained keys", "[theta_union]") {
51
51
  }
52
52
 
53
53
  TEST_CASE("theta union: exact mode half overlap", "[theta_union]") {
54
- update_theta_sketch sketch1 = update_theta_sketch::builder().build();
54
+ auto sketch1 = update_theta_sketch::builder().build();
55
55
  int value = 0;
56
56
  for (int i = 0; i < 1000; i++) sketch1.update(value++);
57
57
 
58
- update_theta_sketch sketch2 = update_theta_sketch::builder().build();
58
+ auto sketch2 = update_theta_sketch::builder().build();
59
59
  value = 500;
60
60
  for (int i = 0; i < 1000; i++) sketch2.update(value++);
61
61
 
62
- theta_union u = theta_union::builder().build();
62
+ auto u = theta_union::builder().build();
63
63
  u.update(sketch1);
64
64
  u.update(sketch2);
65
+ auto sketch3 = u.get_result();
66
+ REQUIRE_FALSE(sketch3.is_empty());
67
+ REQUIRE_FALSE(sketch3.is_estimation_mode());
68
+ REQUIRE(sketch3.get_estimate() == 1500.0);
69
+
70
+ u.reset();
71
+ sketch3 = u.get_result();
72
+ REQUIRE(sketch3.get_num_retained() == 0);
73
+ REQUIRE(sketch3.is_empty());
74
+ REQUIRE_FALSE(sketch3.is_estimation_mode());
75
+ }
76
+
77
+ TEST_CASE("theta union: exact mode half overlap wrapped compact", "[theta_union]") {
78
+ auto sketch1 = update_theta_sketch::builder().build();
79
+ int value = 0;
80
+ for (int i = 0; i < 1000; i++) sketch1.update(value++);
81
+ auto bytes1 = sketch1.compact().serialize();
82
+
83
+ auto sketch2 = update_theta_sketch::builder().build();
84
+ value = 500;
85
+ for (int i = 0; i < 1000; i++) sketch2.update(value++);
86
+ auto bytes2 = sketch2.compact().serialize();
87
+
88
+ auto u = theta_union::builder().build();
89
+ u.update(wrapped_compact_theta_sketch::wrap(bytes1.data(), bytes1.size()));
90
+ u.update(wrapped_compact_theta_sketch::wrap(bytes2.data(), bytes2.size()));
65
91
  compact_theta_sketch sketch3 = u.get_result();
66
92
  REQUIRE_FALSE(sketch3.is_empty());
67
93
  REQUIRE_FALSE(sketch3.is_estimation_mode());
68
- REQUIRE(sketch3.get_estimate() == Approx(1500).margin(1500 * 0.01));
94
+ REQUIRE(sketch3.get_estimate() == 1500.0);
69
95
  }
70
96
 
71
97
  TEST_CASE("theta union: estimation mode half overlap", "[theta_union]") {
72
- update_theta_sketch sketch1 = update_theta_sketch::builder().build();
98
+ auto sketch1 = update_theta_sketch::builder().build();
73
99
  int value = 0;
74
100
  for (int i = 0; i < 10000; i++) sketch1.update(value++);
75
101
 
76
- update_theta_sketch sketch2 = update_theta_sketch::builder().build();
102
+ auto sketch2 = update_theta_sketch::builder().build();
77
103
  value = 5000;
78
104
  for (int i = 0; i < 10000; i++) sketch2.update(value++);
79
105
 
80
- theta_union u = theta_union::builder().build();
106
+ auto u = theta_union::builder().build();
81
107
  u.update(sketch1);
82
108
  u.update(sketch2);
83
- compact_theta_sketch sketch3 = u.get_result();
109
+ auto sketch3 = u.get_result();
84
110
  REQUIRE_FALSE(sketch3.is_empty());
85
111
  REQUIRE(sketch3.is_estimation_mode());
86
112
  REQUIRE(sketch3.get_estimate() == Approx(15000).margin(15000 * 0.01));
87
113
  //std::cerr << sketch3.to_string(true);
114
+
115
+ u.reset();
116
+ sketch3 = u.get_result();
117
+ REQUIRE(sketch3.get_num_retained() == 0);
118
+ REQUIRE(sketch3.is_empty());
119
+ REQUIRE_FALSE(sketch3.is_estimation_mode());
88
120
  }
89
121
 
90
122
  TEST_CASE("theta union: seed mismatch", "[theta_union]") {
@@ -32,41 +32,26 @@ target_include_directories(tuple
32
32
  target_link_libraries(tuple INTERFACE common theta)
33
33
  target_compile_features(tuple INTERFACE cxx_std_11)
34
34
 
35
- set(tuple_HEADERS "")
36
- list(APPEND tuple_HEADERS "include/tuple_sketch.hpp;include/tuple_sketch_impl.hpp")
37
- list(APPEND tuple_HEADERS "include/tuple_union.hpp;include/tuple_union_impl.hpp")
38
- list(APPEND tuple_HEADERS "include/tuple_intersection.hpp;include/tuple_intersection_impl.hpp")
39
- list(APPEND tuple_HEADERS "include/tuple_a_not_b.hpp;include/tuple_a_not_b_impl.hpp")
40
- list(APPEND tuple_HEADERS "include/tuple_jaccard_similarity.hpp")
41
- list(APPEND tuple_HEADERS "include/array_of_doubles_sketch.hpp;include/array_of_doubles_sketch_impl.hpp")
42
- list(APPEND tuple_HEADERS "include/array_of_doubles_union.hpp;include/array_of_doubles_union_impl.hpp")
43
- list(APPEND tuple_HEADERS "include/array_of_doubles_intersection.hpp;include/array_of_doubles_intersection_impl.hpp")
44
- list(APPEND tuple_HEADERS "include/array_of_doubles_a_not_b.hpp;include/array_of_doubles_a_not_b_impl.hpp")
45
-
46
35
  install(TARGETS tuple
47
36
  EXPORT ${PROJECT_NAME}
48
37
  )
49
38
 
50
- install(FILES ${tuple_HEADERS}
39
+ install(FILES
40
+ include/tuple_sketch.hpp
41
+ include/tuple_sketch_impl.hpp
42
+ include/tuple_union.hpp
43
+ include/tuple_union_impl.hpp
44
+ include/tuple_intersection.hpp
45
+ include/tuple_intersection_impl.hpp
46
+ include/tuple_a_not_b.hpp
47
+ include/tuple_a_not_b_impl.hpp
48
+ include/tuple_jaccard_similarity.hpp
49
+ include/array_of_doubles_sketch.hpp
50
+ include/array_of_doubles_sketch_impl.hpp
51
+ include/array_of_doubles_union.hpp
52
+ include/array_of_doubles_union_impl.hpp
53
+ include/array_of_doubles_intersection.hpp
54
+ include/array_of_doubles_intersection_impl.hpp
55
+ include/array_of_doubles_a_not_b.hpp
56
+ include/array_of_doubles_a_not_b_impl.hpp
51
57
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
52
-
53
- target_sources(tuple
54
- INTERFACE
55
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_sketch.hpp
56
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_sketch_impl.hpp
57
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_union.hpp
58
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_union_impl.hpp
59
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_intersection.hpp
60
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_intersection_impl.hpp
61
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_a_not_b.hpp
62
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_a_not_b_impl.hpp
63
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_jaccard_similarity.hpp
64
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch.hpp
65
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch_impl.hpp
66
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_union.hpp
67
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_union_impl.hpp
68
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_intersection.hpp
69
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_intersection_impl.hpp
70
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_a_not_b.hpp
71
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_a_not_b_impl.hpp
72
- )
@@ -122,7 +122,7 @@ public:
122
122
 
123
123
  private:
124
124
  // for builder
125
- update_array_of_doubles_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta,
125
+ update_array_of_doubles_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta,
126
126
  uint64_t seed, const array_of_doubles_update_policy<A>& policy, const A& allocator);
127
127
  };
128
128