datasketches 0.2.3 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (143) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/README.md +8 -8
  4. data/ext/datasketches/kll_wrapper.cpp +7 -3
  5. data/ext/datasketches/theta_wrapper.cpp +20 -4
  6. data/lib/datasketches/version.rb +1 -1
  7. data/vendor/datasketches-cpp/CMakeLists.txt +25 -5
  8. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  9. data/vendor/datasketches-cpp/NOTICE +6 -5
  10. data/vendor/datasketches-cpp/README.md +76 -9
  11. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  12. data/vendor/datasketches-cpp/common/CMakeLists.txt +18 -13
  13. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
  14. data/vendor/datasketches-cpp/common/include/common_defs.hpp +14 -0
  15. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
  16. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
  17. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
  18. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
  19. data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
  20. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  21. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +3 -1
  22. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
  23. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
  24. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +10 -6
  25. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  26. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
  27. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
  28. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  29. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
  30. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +29 -11
  31. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
  32. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
  33. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  34. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
  35. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
  36. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
  37. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
  38. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
  39. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
  40. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
  41. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
  42. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
  43. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
  44. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
  45. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
  46. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
  47. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
  48. data/vendor/datasketches-cpp/kll/CMakeLists.txt +5 -19
  49. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
  50. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +5 -2
  51. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +108 -41
  52. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +150 -132
  53. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +165 -31
  54. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
  55. data/vendor/datasketches-cpp/pyproject.toml +1 -1
  56. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  57. data/vendor/datasketches-cpp/python/README.md +13 -9
  58. data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
  59. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
  60. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +48 -13
  61. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
  62. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
  63. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
  64. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +1 -0
  65. data/vendor/datasketches-cpp/python/tests/kll_test.py +10 -4
  66. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
  67. data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
  68. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +656 -0
  69. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1373 -0
  70. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
  71. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
  72. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
  73. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
  74. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
  75. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
  76. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
  77. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
  78. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
  79. data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
  80. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
  81. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +975 -0
  82. data/vendor/datasketches-cpp/req/CMakeLists.txt +6 -21
  83. data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
  84. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +6 -0
  85. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +30 -2
  86. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +73 -23
  87. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +95 -63
  88. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +74 -3
  89. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  90. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +44 -7
  91. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +44 -33
  92. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +33 -15
  94. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
  95. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
  96. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
  97. data/vendor/datasketches-cpp/setup.py +1 -1
  98. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  99. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  100. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +92 -23
  101. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  102. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +7 -6
  103. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +3 -2
  104. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
  105. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +150 -93
  106. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
  107. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  108. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -2
  109. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  110. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -4
  111. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +34 -9
  112. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  113. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
  114. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  115. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  116. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  117. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  118. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  119. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
  120. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +446 -0
  121. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +429 -1
  122. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -11
  123. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  124. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  125. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
  126. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  127. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  128. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +29 -9
  129. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +34 -14
  130. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  131. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  132. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
  133. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
  134. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
  135. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
  136. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +8 -0
  137. metadata +33 -12
  138. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
  139. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
  140. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
  141. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
  142. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  143. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -17,8 +17,11 @@
17
17
  * under the License.
18
18
  */
19
19
 
20
+ #include <istream>
20
21
  #include <fstream>
21
22
  #include <sstream>
23
+ #include <vector>
24
+ #include <stdexcept>
22
25
 
23
26
  #include <catch.hpp>
24
27
  #include <theta_sketch.hpp>
@@ -39,6 +42,7 @@ TEST_CASE("theta sketch: empty", "[theta_sketch]") {
39
42
  REQUIRE(update_sketch.get_estimate() == 0.0);
40
43
  REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
41
44
  REQUIRE(update_sketch.get_upper_bound(1) == 0.0);
45
+ REQUIRE(update_sketch.is_ordered());
42
46
 
43
47
  compact_theta_sketch compact_sketch = update_sketch.compact();
44
48
  REQUIRE(compact_sketch.is_empty());
@@ -47,6 +51,10 @@ TEST_CASE("theta sketch: empty", "[theta_sketch]") {
47
51
  REQUIRE(compact_sketch.get_estimate() == 0.0);
48
52
  REQUIRE(compact_sketch.get_lower_bound(1) == 0.0);
49
53
  REQUIRE(compact_sketch.get_upper_bound(1) == 0.0);
54
+ REQUIRE(compact_sketch.is_ordered());
55
+
56
+ // empty is forced to be ordered
57
+ REQUIRE(update_sketch.compact(false).is_ordered());
50
58
  }
51
59
 
52
60
  TEST_CASE("theta sketch: non empty no retained keys", "[theta_sketch]") {
@@ -67,6 +75,14 @@ TEST_CASE("theta sketch: non empty no retained keys", "[theta_sketch]") {
67
75
  REQUIRE(compact_sketch.get_estimate() == 0.0);
68
76
  REQUIRE(compact_sketch.get_lower_bound(1) == 0.0);
69
77
  REQUIRE(compact_sketch.get_upper_bound(1) > 0);
78
+
79
+ update_sketch.reset();
80
+ REQUIRE(update_sketch.is_empty());
81
+ REQUIRE_FALSE(update_sketch.is_estimation_mode());
82
+ REQUIRE(update_sketch.get_theta() == 1.0);
83
+ REQUIRE(update_sketch.get_estimate() == 0.0);
84
+ REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
85
+ REQUIRE(update_sketch.get_upper_bound(1) == 0.0);
70
86
  }
71
87
 
72
88
  TEST_CASE("theta sketch: single item", "[theta_sketch]") {
@@ -78,6 +94,7 @@ TEST_CASE("theta sketch: single item", "[theta_sketch]") {
78
94
  REQUIRE(update_sketch.get_estimate() == 1.0);
79
95
  REQUIRE(update_sketch.get_lower_bound(1) == 1.0);
80
96
  REQUIRE(update_sketch.get_upper_bound(1) == 1.0);
97
+ REQUIRE(update_sketch.is_ordered()); // one item is ordered
81
98
 
82
99
  compact_theta_sketch compact_sketch = update_sketch.compact();
83
100
  REQUIRE_FALSE(compact_sketch.is_empty());
@@ -86,6 +103,10 @@ TEST_CASE("theta sketch: single item", "[theta_sketch]") {
86
103
  REQUIRE(compact_sketch.get_estimate() == 1.0);
87
104
  REQUIRE(compact_sketch.get_lower_bound(1) == 1.0);
88
105
  REQUIRE(compact_sketch.get_upper_bound(1) == 1.0);
106
+ REQUIRE(compact_sketch.is_ordered());
107
+
108
+ // single item is forced to be ordered
109
+ REQUIRE(update_sketch.compact(false).is_ordered());
89
110
  }
90
111
 
91
112
  TEST_CASE("theta sketch: resize exact", "[theta_sketch]") {
@@ -97,6 +118,7 @@ TEST_CASE("theta sketch: resize exact", "[theta_sketch]") {
97
118
  REQUIRE(update_sketch.get_estimate() == 2000.0);
98
119
  REQUIRE(update_sketch.get_lower_bound(1) == 2000.0);
99
120
  REQUIRE(update_sketch.get_upper_bound(1) == 2000.0);
121
+ REQUIRE_FALSE(update_sketch.is_ordered());
100
122
 
101
123
  compact_theta_sketch compact_sketch = update_sketch.compact();
102
124
  REQUIRE_FALSE(compact_sketch.is_empty());
@@ -105,6 +127,17 @@ TEST_CASE("theta sketch: resize exact", "[theta_sketch]") {
105
127
  REQUIRE(compact_sketch.get_estimate() == 2000.0);
106
128
  REQUIRE(compact_sketch.get_lower_bound(1) == 2000.0);
107
129
  REQUIRE(compact_sketch.get_upper_bound(1) == 2000.0);
130
+ REQUIRE(compact_sketch.is_ordered());
131
+
132
+ update_sketch.reset();
133
+ REQUIRE(update_sketch.is_empty());
134
+ REQUIRE_FALSE(update_sketch.is_estimation_mode());
135
+ REQUIRE(update_sketch.get_theta() == 1.0);
136
+ REQUIRE(update_sketch.get_estimate() == 0.0);
137
+ REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
138
+ REQUIRE(update_sketch.get_upper_bound(1) == 0.0);
139
+ REQUIRE(update_sketch.is_ordered());
140
+
108
141
  }
109
142
 
110
143
  TEST_CASE("theta sketch: estimation", "[theta_sketch]") {
@@ -148,6 +181,34 @@ TEST_CASE("theta sketch: deserialize compact empty from java", "[theta_sketch]")
148
181
  REQUIRE(sketch.get_upper_bound(1) == 0.0);
149
182
  }
150
183
 
184
+ TEST_CASE("theta sketch: deserialize compact v1 empty from java", "[theta_sketch]") {
185
+ std::ifstream is;
186
+ is.exceptions(std::ios::failbit | std::ios::badbit);
187
+ is.open(inputPath + "theta_compact_empty_from_java_v1.sk", std::ios::binary);
188
+ auto sketch = compact_theta_sketch::deserialize(is);
189
+ REQUIRE(sketch.is_empty());
190
+ REQUIRE_FALSE(sketch.is_estimation_mode());
191
+ REQUIRE(sketch.get_num_retained() == 0);
192
+ REQUIRE(sketch.get_theta() == 1.0);
193
+ REQUIRE(sketch.get_estimate() == 0.0);
194
+ REQUIRE(sketch.get_lower_bound(1) == 0.0);
195
+ REQUIRE(sketch.get_upper_bound(1) == 0.0);
196
+ }
197
+
198
+ TEST_CASE("theta sketch: deserialize compact v2 empty from java", "[theta_sketch]") {
199
+ std::ifstream is;
200
+ is.exceptions(std::ios::failbit | std::ios::badbit);
201
+ is.open(inputPath + "theta_compact_empty_from_java_v2.sk", std::ios::binary);
202
+ auto sketch = compact_theta_sketch::deserialize(is);
203
+ REQUIRE(sketch.is_empty());
204
+ REQUIRE_FALSE(sketch.is_estimation_mode());
205
+ REQUIRE(sketch.get_num_retained() == 0);
206
+ REQUIRE(sketch.get_theta() == 1.0);
207
+ REQUIRE(sketch.get_estimate() == 0.0);
208
+ REQUIRE(sketch.get_lower_bound(1) == 0.0);
209
+ REQUIRE(sketch.get_upper_bound(1) == 0.0);
210
+ }
211
+
151
212
  TEST_CASE("theta sketch: deserialize single item from java", "[theta_sketch]") {
152
213
  std::ifstream is;
153
214
  is.exceptions(std::ios::failbit | std::ios::badbit);
@@ -162,6 +223,38 @@ TEST_CASE("theta sketch: deserialize single item from java", "[theta_sketch]") {
162
223
  REQUIRE(sketch.get_upper_bound(1) == 1.0);
163
224
  }
164
225
 
226
+ TEST_CASE("theta sketch: deserialize compact exact from java", "[theta_sketch]") {
227
+ std::ifstream is;
228
+ is.exceptions(std::ios::failbit | std::ios::badbit);
229
+ is.open(inputPath + "theta_compact_exact_from_java.sk", std::ios::binary);
230
+ auto sketch = compact_theta_sketch::deserialize(is);
231
+ REQUIRE_FALSE(sketch.is_empty());
232
+ REQUIRE_FALSE(sketch.is_estimation_mode());
233
+ REQUIRE(sketch.is_ordered());
234
+ REQUIRE(sketch.get_num_retained() == 100);
235
+
236
+ // the same construction process in Java must have produced exactly the same sketch
237
+ auto update_sketch = update_theta_sketch::builder().build();
238
+ const int n = 100;
239
+ for (int i = 0; i < n; i++) update_sketch.update(i);
240
+ REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
241
+ REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
242
+ REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
243
+ REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
244
+ REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
245
+ REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
246
+ REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
247
+ REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
248
+ REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
249
+ compact_theta_sketch compact_sketch = update_sketch.compact();
250
+ // the sketches are ordered, so the iteration sequence must match exactly
251
+ auto iter = sketch.begin();
252
+ for (const auto& key: compact_sketch) {
253
+ REQUIRE(*iter == key);
254
+ ++iter;
255
+ }
256
+ }
257
+
165
258
  TEST_CASE("theta sketch: deserialize compact estimation from java", "[theta_sketch]") {
166
259
  std::ifstream is;
167
260
  is.exceptions(std::ios::failbit | std::ios::badbit);
@@ -198,6 +291,78 @@ TEST_CASE("theta sketch: deserialize compact estimation from java", "[theta_sket
198
291
  }
199
292
  }
200
293
 
294
+ TEST_CASE("theta sketch: deserialize compact v1 estimation from java", "[theta_sketch]") {
295
+ std::ifstream is;
296
+ is.exceptions(std::ios::failbit | std::ios::badbit);
297
+ is.open(inputPath + "theta_compact_estimation_from_java_v1.sk", std::ios::binary);
298
+ auto sketch = compact_theta_sketch::deserialize(is);
299
+ REQUIRE_FALSE(sketch.is_empty());
300
+ REQUIRE(sketch.is_estimation_mode());
301
+ REQUIRE(sketch.is_ordered());
302
+ REQUIRE(sketch.get_num_retained() == 4342);
303
+ REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
304
+ REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
305
+ REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
306
+ REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
307
+
308
+ // the same construction process in Java must have produced exactly the same sketch
309
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
310
+ const int n = 8192;
311
+ for (int i = 0; i < n; i++) update_sketch.update(i);
312
+ REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
313
+ REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
314
+ REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
315
+ REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
316
+ REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
317
+ REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
318
+ REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
319
+ REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
320
+ REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
321
+ compact_theta_sketch compact_sketch = update_sketch.compact();
322
+ // the sketches are ordered, so the iteration sequence must match exactly
323
+ auto iter = sketch.begin();
324
+ for (const auto& key: compact_sketch) {
325
+ REQUIRE(*iter == key);
326
+ ++iter;
327
+ }
328
+ }
329
+
330
+ TEST_CASE("theta sketch: deserialize compact v2 estimation from java", "[theta_sketch]") {
331
+ std::ifstream is;
332
+ is.exceptions(std::ios::failbit | std::ios::badbit);
333
+ is.open(inputPath + "theta_compact_estimation_from_java_v2.sk", std::ios::binary);
334
+ auto sketch = compact_theta_sketch::deserialize(is);
335
+ REQUIRE_FALSE(sketch.is_empty());
336
+ REQUIRE(sketch.is_estimation_mode());
337
+ REQUIRE(sketch.is_ordered());
338
+ REQUIRE(sketch.get_num_retained() == 4342);
339
+ REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
340
+ REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
341
+ REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
342
+ REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
343
+
344
+ // the same construction process in Java must have produced exactly the same sketch
345
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
346
+ const int n = 8192;
347
+ for (int i = 0; i < n; i++) update_sketch.update(i);
348
+ REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
349
+ REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
350
+ REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
351
+ REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
352
+ REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
353
+ REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
354
+ REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
355
+ REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
356
+ REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
357
+ compact_theta_sketch compact_sketch = update_sketch.compact();
358
+ // the sketches are ordered, so the iteration sequence must match exactly
359
+ auto iter = sketch.begin();
360
+ for (const auto& key: compact_sketch) {
361
+ REQUIRE(*iter == key);
362
+ ++iter;
363
+ }
364
+ }
365
+
201
366
  TEST_CASE("theta sketch: serialize deserialize stream and bytes equivalence", "[theta_sketch]") {
202
367
  update_theta_sketch update_sketch = update_theta_sketch::builder().build();
203
368
  const int n = 8192;
@@ -230,7 +395,13 @@ TEST_CASE("theta sketch: serialize deserialize stream and bytes equivalence", "[
230
395
  }
231
396
  }
232
397
 
233
- TEST_CASE("theta sketch: deserialize compact single item buffer overrun", "[theta_sketch]") {
398
+ TEST_CASE("theta sketch: deserialize empty buffer overrun", "[theta_sketch]") {
399
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
400
+ auto bytes = update_sketch.compact().serialize();
401
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
402
+ }
403
+
404
+ TEST_CASE("theta sketch: deserialize single item buffer overrun", "[theta_sketch]") {
234
405
  update_theta_sketch update_sketch = update_theta_sketch::builder().build();
235
406
  update_sketch.update(1);
236
407
  auto bytes = update_sketch.compact().serialize();
@@ -238,6 +409,27 @@ TEST_CASE("theta sketch: deserialize compact single item buffer overrun", "[thet
238
409
  REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
239
410
  }
240
411
 
412
+ TEST_CASE("theta sketch: deserialize exact mode buffer overrun", "[theta_sketch]") {
413
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
414
+ for (int i = 0; i < 1000; ++i) update_sketch.update(i);
415
+ auto bytes = update_sketch.compact().serialize();
416
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 7), std::out_of_range);
417
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 8), std::out_of_range);
418
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 16), std::out_of_range);
419
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
420
+ }
421
+
422
+ TEST_CASE("theta sketch: deserialize estimation mode buffer overrun", "[theta_sketch]") {
423
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
424
+ for (int i = 0; i < 10000; ++i) update_sketch.update(i);
425
+ auto bytes = update_sketch.compact().serialize();
426
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 7), std::out_of_range);
427
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 8), std::out_of_range);
428
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 16), std::out_of_range);
429
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 24), std::out_of_range);
430
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
431
+ }
432
+
241
433
  TEST_CASE("theta sketch: conversion constructor and wrapped compact", "[theta_sketch]") {
242
434
  update_theta_sketch update_sketch = update_theta_sketch::builder().build();
243
435
  const int n = 8192;
@@ -269,9 +461,245 @@ TEST_CASE("theta sketch: conversion constructor and wrapped compact", "[theta_sk
269
461
  REQUIRE(*it == entry);
270
462
  ++it;
271
463
  }
464
+ REQUIRE(ordered_compact3.get_estimate() == ordered_compact1.get_estimate());
465
+ REQUIRE(ordered_compact3.get_lower_bound(1) == ordered_compact1.get_lower_bound(1));
466
+ REQUIRE(ordered_compact3.get_upper_bound(1) == ordered_compact1.get_upper_bound(1));
467
+ REQUIRE(ordered_compact3.is_estimation_mode() == ordered_compact1.is_estimation_mode());
468
+ REQUIRE(ordered_compact3.get_theta() == ordered_compact1.get_theta());
469
+
272
470
 
273
471
  // seed mismatch
274
472
  REQUIRE_THROWS_AS(wrapped_compact_theta_sketch::wrap(bytes.data(), bytes.size(), 0), std::invalid_argument);
275
473
  }
276
474
 
475
+ TEST_CASE("theta sketch: wrap compact empty from java", "[theta_sketch]") {
476
+ std::ifstream is;
477
+ is.exceptions(std::ios::failbit | std::ios::badbit);
478
+ is.open(inputPath + "theta_compact_empty_from_java.sk", std::ios::binary | std::ios::ate);
479
+
480
+ std::vector<uint8_t> buf;
481
+ if(is) {
482
+ auto size = is.tellg();
483
+ buf.reserve(size);
484
+ buf.assign(size, 0);
485
+ is.seekg(0, std::ios_base::beg);
486
+ is.read((char*)(buf.data()), buf.size());
487
+ }
488
+
489
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
490
+ REQUIRE(sketch.is_empty());
491
+ REQUIRE_FALSE(sketch.is_estimation_mode());
492
+ REQUIRE(sketch.get_num_retained() == 0);
493
+ REQUIRE(sketch.get_theta() == 1.0);
494
+ REQUIRE(sketch.get_estimate() == 0.0);
495
+ REQUIRE(sketch.get_lower_bound(1) == 0.0);
496
+ REQUIRE(sketch.get_upper_bound(1) == 0.0);
497
+ }
498
+
499
+ TEST_CASE("theta sketch: wrap compact v1 empty from java", "[theta_sketch]") {
500
+ std::ifstream is;
501
+ is.exceptions(std::ios::failbit | std::ios::badbit);
502
+ is.open(inputPath + "theta_compact_empty_from_java_v1.sk", std::ios::binary | std::ios::ate);
503
+
504
+ std::vector<uint8_t> buf;
505
+ if(is) {
506
+ auto size = is.tellg();
507
+ buf.reserve(size);
508
+ buf.assign(size, 0);
509
+ is.seekg(0, std::ios_base::beg);
510
+ is.read((char*)(buf.data()), buf.size());
511
+ }
512
+
513
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
514
+ REQUIRE(sketch.is_empty());
515
+ REQUIRE_FALSE(sketch.is_estimation_mode());
516
+ REQUIRE(sketch.get_num_retained() == 0);
517
+ REQUIRE(sketch.get_theta() == 1.0);
518
+ REQUIRE(sketch.get_estimate() == 0.0);
519
+ REQUIRE(sketch.get_lower_bound(1) == 0.0);
520
+ REQUIRE(sketch.get_upper_bound(1) == 0.0);
521
+ }
522
+
523
+ TEST_CASE("theta sketch: wrap compact v2 empty from java", "[theta_sketch]") {
524
+ std::ifstream is;
525
+ is.exceptions(std::ios::failbit | std::ios::badbit);
526
+ is.open(inputPath + "theta_compact_empty_from_java_v2.sk", std::ios::binary | std::ios::ate);
527
+
528
+ std::vector<uint8_t> buf;
529
+ if(is) {
530
+ auto size = is.tellg();
531
+ buf.reserve(size);
532
+ buf.assign(size, 0);
533
+ is.seekg(0, std::ios_base::beg);
534
+ is.read((char*)(buf.data()), buf.size());
535
+ }
536
+
537
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
538
+ REQUIRE(sketch.is_empty());
539
+ REQUIRE_FALSE(sketch.is_estimation_mode());
540
+ REQUIRE(sketch.get_num_retained() == 0);
541
+ REQUIRE(sketch.get_theta() == 1.0);
542
+ REQUIRE(sketch.get_estimate() == 0.0);
543
+ REQUIRE(sketch.get_lower_bound(1) == 0.0);
544
+ REQUIRE(sketch.get_upper_bound(1) == 0.0);
545
+ }
546
+
547
+ TEST_CASE("theta sketch: wrap single item from java", "[theta_sketch]") {
548
+ std::ifstream is;
549
+ is.exceptions(std::ios::failbit | std::ios::badbit);
550
+ is.open(inputPath + "theta_compact_single_item_from_java.sk", std::ios::binary | std::ios::ate);
551
+ std::vector<uint8_t> buf;
552
+ if(is) {
553
+ auto size = is.tellg();
554
+ buf.reserve(size);
555
+ buf.assign(size, 0);
556
+ is.seekg(0, std::ios_base::beg);
557
+ is.read((char*)(buf.data()), buf.size());
558
+ }
559
+
560
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
561
+ REQUIRE_FALSE(sketch.is_empty());
562
+ REQUIRE_FALSE(sketch.is_estimation_mode());
563
+ REQUIRE(sketch.get_num_retained() == 1);
564
+ REQUIRE(sketch.get_theta() == 1.0);
565
+ REQUIRE(sketch.get_estimate() == 1.0);
566
+ REQUIRE(sketch.get_lower_bound(1) == 1.0);
567
+ REQUIRE(sketch.get_upper_bound(1) == 1.0);
568
+ }
569
+
570
+ TEST_CASE("theta sketch: wrap compact estimation from java", "[theta_sketch]") {
571
+ std::ifstream is;
572
+ is.exceptions(std::ios::failbit | std::ios::badbit);
573
+ is.open(inputPath + "theta_compact_estimation_from_java.sk", std::ios::binary | std::ios::ate);
574
+ std::vector<uint8_t> buf;
575
+ if(is) {
576
+ auto size = is.tellg();
577
+ buf.reserve(size);
578
+ buf.assign(size, 0);
579
+ is.seekg(0, std::ios_base::beg);
580
+ is.read((char*)(buf.data()), buf.size());
581
+ }
582
+
583
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
584
+ REQUIRE_FALSE(sketch.is_empty());
585
+ REQUIRE(sketch.is_estimation_mode());
586
+ REQUIRE(sketch.is_ordered());
587
+ REQUIRE(sketch.get_num_retained() == 4342);
588
+ REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
589
+ REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
590
+ REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
591
+ REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
592
+
593
+ // the same construction process in Java must have produced exactly the same sketch
594
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
595
+ const int n = 8192;
596
+ for (int i = 0; i < n; i++) update_sketch.update(i);
597
+ REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
598
+ REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
599
+ REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
600
+ REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
601
+ REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
602
+ REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
603
+ REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
604
+ REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
605
+ REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
606
+ compact_theta_sketch compact_sketch = update_sketch.compact();
607
+ // the sketches are ordered, so the iteration sequence must match exactly
608
+ auto iter = sketch.begin();
609
+ for (const auto& key: compact_sketch) {
610
+ REQUIRE(*iter == key);
611
+ ++iter;
612
+ }
613
+ }
614
+
615
+ TEST_CASE("theta sketch: wrap compact v1 estimation from java", "[theta_sketch]") {
616
+ std::ifstream is;
617
+ is.exceptions(std::ios::failbit | std::ios::badbit);
618
+ is.open(inputPath + "theta_compact_estimation_from_java_v1.sk", std::ios::binary | std::ios::ate);
619
+ std::vector<uint8_t> buf;
620
+ if(is) {
621
+ auto size = is.tellg();
622
+ buf.reserve(size);
623
+ buf.assign(size, 0);
624
+ is.seekg(0, std::ios_base::beg);
625
+ is.read((char*)(buf.data()), buf.size());
626
+ }
627
+
628
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
629
+ REQUIRE_FALSE(sketch.is_empty());
630
+ REQUIRE(sketch.is_estimation_mode());
631
+ // REQUIRE(sketch.is_ordered()); // v1 may not be ordered
632
+ REQUIRE(sketch.get_num_retained() == 4342);
633
+ REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
634
+ REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
635
+ REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
636
+ REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
637
+
638
+ // the same construction process in Java must have produced exactly the same sketch
639
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
640
+ const int n = 8192;
641
+ for (int i = 0; i < n; i++) update_sketch.update(i);
642
+ REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
643
+ REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
644
+ REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
645
+ REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
646
+ REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
647
+ REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
648
+ REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
649
+ REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
650
+ REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
651
+ compact_theta_sketch compact_sketch = update_sketch.compact();
652
+ // the sketches are ordered, so the iteration sequence must match exactly
653
+ auto iter = sketch.begin();
654
+ for (const auto& key: compact_sketch) {
655
+ REQUIRE(*iter == key);
656
+ ++iter;
657
+ }
658
+ }
659
+
660
+ TEST_CASE("theta sketch: wrap compact v2 estimation from java", "[theta_sketch]") {
661
+ std::ifstream is;
662
+ is.exceptions(std::ios::failbit | std::ios::badbit);
663
+ is.open(inputPath + "theta_compact_estimation_from_java_v2.sk", std::ios::binary | std::ios::ate);
664
+ std::vector<uint8_t> buf;
665
+ if(is) {
666
+ auto size = is.tellg();
667
+ buf.reserve(size);
668
+ buf.assign(size, 0);
669
+ is.seekg(0, std::ios_base::beg);
670
+ is.read((char*)(buf.data()), buf.size());
671
+ }
672
+
673
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
674
+ REQUIRE_FALSE(sketch.is_empty());
675
+ REQUIRE(sketch.is_estimation_mode());
676
+ // REQUIRE(sketch.is_ordered()); // v1 may not be ordered
677
+ REQUIRE(sketch.get_num_retained() == 4342);
678
+ REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
679
+ REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
680
+ REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
681
+ REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
682
+
683
+ // the same construction process in Java must have produced exactly the same sketch
684
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
685
+ const int n = 8192;
686
+ for (int i = 0; i < n; i++) update_sketch.update(i);
687
+ REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
688
+ REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
689
+ REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
690
+ REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
691
+ REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
692
+ REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
693
+ REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
694
+ REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
695
+ REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
696
+ compact_theta_sketch compact_sketch = update_sketch.compact();
697
+ // the sketches are ordered, so the iteration sequence must match exactly
698
+ auto iter = sketch.begin();
699
+ for (const auto& key: compact_sketch) {
700
+ REQUIRE(*iter == key);
701
+ ++iter;
702
+ }
703
+ }
704
+
277
705
  } /* namespace datasketches */
@@ -21,6 +21,8 @@
21
21
 
22
22
  #include <theta_union.hpp>
23
23
 
24
+ #include <stdexcept>
25
+
24
26
  namespace datasketches {
25
27
 
26
28
  TEST_CASE("theta union: empty", "[theta_union]") {
@@ -51,35 +53,41 @@ TEST_CASE("theta union: non empty no retained keys", "[theta_union]") {
51
53
  }
52
54
 
53
55
  TEST_CASE("theta union: exact mode half overlap", "[theta_union]") {
54
- update_theta_sketch sketch1 = update_theta_sketch::builder().build();
56
+ auto sketch1 = update_theta_sketch::builder().build();
55
57
  int value = 0;
56
58
  for (int i = 0; i < 1000; i++) sketch1.update(value++);
57
59
 
58
- update_theta_sketch sketch2 = update_theta_sketch::builder().build();
60
+ auto sketch2 = update_theta_sketch::builder().build();
59
61
  value = 500;
60
62
  for (int i = 0; i < 1000; i++) sketch2.update(value++);
61
63
 
62
- theta_union u = theta_union::builder().build();
64
+ auto u = theta_union::builder().build();
63
65
  u.update(sketch1);
64
66
  u.update(sketch2);
65
- compact_theta_sketch sketch3 = u.get_result();
67
+ auto sketch3 = u.get_result();
66
68
  REQUIRE_FALSE(sketch3.is_empty());
67
69
  REQUIRE_FALSE(sketch3.is_estimation_mode());
68
70
  REQUIRE(sketch3.get_estimate() == 1500.0);
71
+
72
+ u.reset();
73
+ sketch3 = u.get_result();
74
+ REQUIRE(sketch3.get_num_retained() == 0);
75
+ REQUIRE(sketch3.is_empty());
76
+ REQUIRE_FALSE(sketch3.is_estimation_mode());
69
77
  }
70
78
 
71
79
  TEST_CASE("theta union: exact mode half overlap wrapped compact", "[theta_union]") {
72
- update_theta_sketch sketch1 = update_theta_sketch::builder().build();
80
+ auto sketch1 = update_theta_sketch::builder().build();
73
81
  int value = 0;
74
82
  for (int i = 0; i < 1000; i++) sketch1.update(value++);
75
83
  auto bytes1 = sketch1.compact().serialize();
76
84
 
77
- update_theta_sketch sketch2 = update_theta_sketch::builder().build();
85
+ auto sketch2 = update_theta_sketch::builder().build();
78
86
  value = 500;
79
87
  for (int i = 0; i < 1000; i++) sketch2.update(value++);
80
88
  auto bytes2 = sketch2.compact().serialize();
81
89
 
82
- theta_union u = theta_union::builder().build();
90
+ auto u = theta_union::builder().build();
83
91
  u.update(wrapped_compact_theta_sketch::wrap(bytes1.data(), bytes1.size()));
84
92
  u.update(wrapped_compact_theta_sketch::wrap(bytes2.data(), bytes2.size()));
85
93
  compact_theta_sketch sketch3 = u.get_result();
@@ -89,22 +97,28 @@ TEST_CASE("theta union: exact mode half overlap wrapped compact", "[theta_union]
89
97
  }
90
98
 
91
99
  TEST_CASE("theta union: estimation mode half overlap", "[theta_union]") {
92
- update_theta_sketch sketch1 = update_theta_sketch::builder().build();
100
+ auto sketch1 = update_theta_sketch::builder().build();
93
101
  int value = 0;
94
102
  for (int i = 0; i < 10000; i++) sketch1.update(value++);
95
103
 
96
- update_theta_sketch sketch2 = update_theta_sketch::builder().build();
104
+ auto sketch2 = update_theta_sketch::builder().build();
97
105
  value = 5000;
98
106
  for (int i = 0; i < 10000; i++) sketch2.update(value++);
99
107
 
100
- theta_union u = theta_union::builder().build();
108
+ auto u = theta_union::builder().build();
101
109
  u.update(sketch1);
102
110
  u.update(sketch2);
103
- compact_theta_sketch sketch3 = u.get_result();
111
+ auto sketch3 = u.get_result();
104
112
  REQUIRE_FALSE(sketch3.is_empty());
105
113
  REQUIRE(sketch3.is_estimation_mode());
106
114
  REQUIRE(sketch3.get_estimate() == Approx(15000).margin(15000 * 0.01));
107
115
  //std::cerr << sketch3.to_string(true);
116
+
117
+ u.reset();
118
+ sketch3 = u.get_result();
119
+ REQUIRE(sketch3.get_num_retained() == 0);
120
+ REQUIRE(sketch3.is_empty());
121
+ REQUIRE_FALSE(sketch3.is_estimation_mode());
108
122
  }
109
123
 
110
124
  TEST_CASE("theta union: seed mismatch", "[theta_union]") {
@@ -32,41 +32,26 @@ target_include_directories(tuple
32
32
  target_link_libraries(tuple INTERFACE common theta)
33
33
  target_compile_features(tuple INTERFACE cxx_std_11)
34
34
 
35
- set(tuple_HEADERS "")
36
- list(APPEND tuple_HEADERS "include/tuple_sketch.hpp;include/tuple_sketch_impl.hpp")
37
- list(APPEND tuple_HEADERS "include/tuple_union.hpp;include/tuple_union_impl.hpp")
38
- list(APPEND tuple_HEADERS "include/tuple_intersection.hpp;include/tuple_intersection_impl.hpp")
39
- list(APPEND tuple_HEADERS "include/tuple_a_not_b.hpp;include/tuple_a_not_b_impl.hpp")
40
- list(APPEND tuple_HEADERS "include/tuple_jaccard_similarity.hpp")
41
- list(APPEND tuple_HEADERS "include/array_of_doubles_sketch.hpp;include/array_of_doubles_sketch_impl.hpp")
42
- list(APPEND tuple_HEADERS "include/array_of_doubles_union.hpp;include/array_of_doubles_union_impl.hpp")
43
- list(APPEND tuple_HEADERS "include/array_of_doubles_intersection.hpp;include/array_of_doubles_intersection_impl.hpp")
44
- list(APPEND tuple_HEADERS "include/array_of_doubles_a_not_b.hpp;include/array_of_doubles_a_not_b_impl.hpp")
45
-
46
35
  install(TARGETS tuple
47
36
  EXPORT ${PROJECT_NAME}
48
37
  )
49
38
 
50
- install(FILES ${tuple_HEADERS}
39
+ install(FILES
40
+ include/tuple_sketch.hpp
41
+ include/tuple_sketch_impl.hpp
42
+ include/tuple_union.hpp
43
+ include/tuple_union_impl.hpp
44
+ include/tuple_intersection.hpp
45
+ include/tuple_intersection_impl.hpp
46
+ include/tuple_a_not_b.hpp
47
+ include/tuple_a_not_b_impl.hpp
48
+ include/tuple_jaccard_similarity.hpp
49
+ include/array_of_doubles_sketch.hpp
50
+ include/array_of_doubles_sketch_impl.hpp
51
+ include/array_of_doubles_union.hpp
52
+ include/array_of_doubles_union_impl.hpp
53
+ include/array_of_doubles_intersection.hpp
54
+ include/array_of_doubles_intersection_impl.hpp
55
+ include/array_of_doubles_a_not_b.hpp
56
+ include/array_of_doubles_a_not_b_impl.hpp
51
57
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
52
-
53
- target_sources(tuple
54
- INTERFACE
55
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_sketch.hpp
56
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_sketch_impl.hpp
57
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_union.hpp
58
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_union_impl.hpp
59
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_intersection.hpp
60
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_intersection_impl.hpp
61
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_a_not_b.hpp
62
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_a_not_b_impl.hpp
63
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_jaccard_similarity.hpp
64
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch.hpp
65
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch_impl.hpp
66
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_union.hpp
67
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_union_impl.hpp
68
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_intersection.hpp
69
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_intersection_impl.hpp
70
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_a_not_b.hpp
71
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_a_not_b_impl.hpp
72
- )