datasketches 0.2.2 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/README.md +8 -8
  6. data/ext/datasketches/kll_wrapper.cpp +5 -1
  7. data/ext/datasketches/theta_wrapper.cpp +20 -4
  8. data/lib/datasketches/version.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +27 -5
  10. data/vendor/datasketches-cpp/LICENSE +40 -3
  11. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  12. data/vendor/datasketches-cpp/NOTICE +1 -1
  13. data/vendor/datasketches-cpp/README.md +76 -9
  14. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +18 -13
  16. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +16 -0
  18. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
  19. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
  20. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
  21. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
  22. data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
  23. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  24. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
  25. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
  26. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
  27. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +1 -1
  28. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
  29. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +10 -6
  31. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
  33. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
  34. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  35. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
  36. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +30 -12
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
  38. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
  41. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
  42. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
  44. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
  45. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
  46. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
  47. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
  48. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
  49. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
  50. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
  51. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
  52. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
  53. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
  54. data/vendor/datasketches-cpp/kll/CMakeLists.txt +5 -19
  55. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
  56. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
  57. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +103 -44
  58. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +110 -130
  59. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +156 -23
  60. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
  61. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  62. data/vendor/datasketches-cpp/python/CMakeLists.txt +17 -6
  63. data/vendor/datasketches-cpp/python/README.md +57 -50
  64. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  65. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  66. data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
  67. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
  68. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +49 -14
  69. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
  70. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
  71. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
  72. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +2 -2
  73. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +12 -5
  74. data/vendor/datasketches-cpp/python/tests/kll_test.py +12 -6
  75. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
  76. data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
  77. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  78. data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
  79. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
  80. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
  81. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
  82. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
  83. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
  84. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
  85. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
  86. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
  87. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
  88. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
  89. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
  90. data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
  91. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
  92. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
  93. data/vendor/datasketches-cpp/req/CMakeLists.txt +6 -21
  94. data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
  95. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
  96. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
  97. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +66 -61
  98. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
  99. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  100. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +54 -12
  101. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +45 -34
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +33 -15
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
  107. data/vendor/datasketches-cpp/setup.py +10 -7
  108. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  110. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +92 -23
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +7 -6
  114. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +3 -2
  115. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
  116. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +150 -93
  117. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
  118. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -2
  120. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  121. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +9 -5
  122. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +39 -10
  123. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  124. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
  125. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  126. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  127. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  130. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
  131. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +446 -0
  132. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +429 -1
  133. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -11
  134. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  135. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  136. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
  137. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  138. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +29 -9
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +34 -14
  141. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  142. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  143. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
  144. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
  145. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
  146. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
  147. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +8 -0
  148. metadata +34 -12
  149. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
  150. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
  151. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
  152. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
  153. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  154. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -17,8 +17,11 @@
17
17
  * under the License.
18
18
  */
19
19
 
20
+ #include <istream>
20
21
  #include <fstream>
21
22
  #include <sstream>
23
+ #include <vector>
24
+ #include <stdexcept>
22
25
 
23
26
  #include <catch.hpp>
24
27
  #include <theta_sketch.hpp>
@@ -39,6 +42,7 @@ TEST_CASE("theta sketch: empty", "[theta_sketch]") {
39
42
  REQUIRE(update_sketch.get_estimate() == 0.0);
40
43
  REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
41
44
  REQUIRE(update_sketch.get_upper_bound(1) == 0.0);
45
+ REQUIRE(update_sketch.is_ordered());
42
46
 
43
47
  compact_theta_sketch compact_sketch = update_sketch.compact();
44
48
  REQUIRE(compact_sketch.is_empty());
@@ -47,6 +51,10 @@ TEST_CASE("theta sketch: empty", "[theta_sketch]") {
47
51
  REQUIRE(compact_sketch.get_estimate() == 0.0);
48
52
  REQUIRE(compact_sketch.get_lower_bound(1) == 0.0);
49
53
  REQUIRE(compact_sketch.get_upper_bound(1) == 0.0);
54
+ REQUIRE(compact_sketch.is_ordered());
55
+
56
+ // empty is forced to be ordered
57
+ REQUIRE(update_sketch.compact(false).is_ordered());
50
58
  }
51
59
 
52
60
  TEST_CASE("theta sketch: non empty no retained keys", "[theta_sketch]") {
@@ -67,6 +75,14 @@ TEST_CASE("theta sketch: non empty no retained keys", "[theta_sketch]") {
67
75
  REQUIRE(compact_sketch.get_estimate() == 0.0);
68
76
  REQUIRE(compact_sketch.get_lower_bound(1) == 0.0);
69
77
  REQUIRE(compact_sketch.get_upper_bound(1) > 0);
78
+
79
+ update_sketch.reset();
80
+ REQUIRE(update_sketch.is_empty());
81
+ REQUIRE_FALSE(update_sketch.is_estimation_mode());
82
+ REQUIRE(update_sketch.get_theta() == 1.0);
83
+ REQUIRE(update_sketch.get_estimate() == 0.0);
84
+ REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
85
+ REQUIRE(update_sketch.get_upper_bound(1) == 0.0);
70
86
  }
71
87
 
72
88
  TEST_CASE("theta sketch: single item", "[theta_sketch]") {
@@ -78,6 +94,7 @@ TEST_CASE("theta sketch: single item", "[theta_sketch]") {
78
94
  REQUIRE(update_sketch.get_estimate() == 1.0);
79
95
  REQUIRE(update_sketch.get_lower_bound(1) == 1.0);
80
96
  REQUIRE(update_sketch.get_upper_bound(1) == 1.0);
97
+ REQUIRE(update_sketch.is_ordered()); // one item is ordered
81
98
 
82
99
  compact_theta_sketch compact_sketch = update_sketch.compact();
83
100
  REQUIRE_FALSE(compact_sketch.is_empty());
@@ -86,6 +103,10 @@ TEST_CASE("theta sketch: single item", "[theta_sketch]") {
86
103
  REQUIRE(compact_sketch.get_estimate() == 1.0);
87
104
  REQUIRE(compact_sketch.get_lower_bound(1) == 1.0);
88
105
  REQUIRE(compact_sketch.get_upper_bound(1) == 1.0);
106
+ REQUIRE(compact_sketch.is_ordered());
107
+
108
+ // single item is forced to be ordered
109
+ REQUIRE(update_sketch.compact(false).is_ordered());
89
110
  }
90
111
 
91
112
  TEST_CASE("theta sketch: resize exact", "[theta_sketch]") {
@@ -97,6 +118,7 @@ TEST_CASE("theta sketch: resize exact", "[theta_sketch]") {
97
118
  REQUIRE(update_sketch.get_estimate() == 2000.0);
98
119
  REQUIRE(update_sketch.get_lower_bound(1) == 2000.0);
99
120
  REQUIRE(update_sketch.get_upper_bound(1) == 2000.0);
121
+ REQUIRE_FALSE(update_sketch.is_ordered());
100
122
 
101
123
  compact_theta_sketch compact_sketch = update_sketch.compact();
102
124
  REQUIRE_FALSE(compact_sketch.is_empty());
@@ -105,6 +127,17 @@ TEST_CASE("theta sketch: resize exact", "[theta_sketch]") {
105
127
  REQUIRE(compact_sketch.get_estimate() == 2000.0);
106
128
  REQUIRE(compact_sketch.get_lower_bound(1) == 2000.0);
107
129
  REQUIRE(compact_sketch.get_upper_bound(1) == 2000.0);
130
+ REQUIRE(compact_sketch.is_ordered());
131
+
132
+ update_sketch.reset();
133
+ REQUIRE(update_sketch.is_empty());
134
+ REQUIRE_FALSE(update_sketch.is_estimation_mode());
135
+ REQUIRE(update_sketch.get_theta() == 1.0);
136
+ REQUIRE(update_sketch.get_estimate() == 0.0);
137
+ REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
138
+ REQUIRE(update_sketch.get_upper_bound(1) == 0.0);
139
+ REQUIRE(update_sketch.is_ordered());
140
+
108
141
  }
109
142
 
110
143
  TEST_CASE("theta sketch: estimation", "[theta_sketch]") {
@@ -148,6 +181,34 @@ TEST_CASE("theta sketch: deserialize compact empty from java", "[theta_sketch]")
148
181
  REQUIRE(sketch.get_upper_bound(1) == 0.0);
149
182
  }
150
183
 
184
+ TEST_CASE("theta sketch: deserialize compact v1 empty from java", "[theta_sketch]") {
185
+ std::ifstream is;
186
+ is.exceptions(std::ios::failbit | std::ios::badbit);
187
+ is.open(inputPath + "theta_compact_empty_from_java_v1.sk", std::ios::binary);
188
+ auto sketch = compact_theta_sketch::deserialize(is);
189
+ REQUIRE(sketch.is_empty());
190
+ REQUIRE_FALSE(sketch.is_estimation_mode());
191
+ REQUIRE(sketch.get_num_retained() == 0);
192
+ REQUIRE(sketch.get_theta() == 1.0);
193
+ REQUIRE(sketch.get_estimate() == 0.0);
194
+ REQUIRE(sketch.get_lower_bound(1) == 0.0);
195
+ REQUIRE(sketch.get_upper_bound(1) == 0.0);
196
+ }
197
+
198
+ TEST_CASE("theta sketch: deserialize compact v2 empty from java", "[theta_sketch]") {
199
+ std::ifstream is;
200
+ is.exceptions(std::ios::failbit | std::ios::badbit);
201
+ is.open(inputPath + "theta_compact_empty_from_java_v2.sk", std::ios::binary);
202
+ auto sketch = compact_theta_sketch::deserialize(is);
203
+ REQUIRE(sketch.is_empty());
204
+ REQUIRE_FALSE(sketch.is_estimation_mode());
205
+ REQUIRE(sketch.get_num_retained() == 0);
206
+ REQUIRE(sketch.get_theta() == 1.0);
207
+ REQUIRE(sketch.get_estimate() == 0.0);
208
+ REQUIRE(sketch.get_lower_bound(1) == 0.0);
209
+ REQUIRE(sketch.get_upper_bound(1) == 0.0);
210
+ }
211
+
151
212
  TEST_CASE("theta sketch: deserialize single item from java", "[theta_sketch]") {
152
213
  std::ifstream is;
153
214
  is.exceptions(std::ios::failbit | std::ios::badbit);
@@ -162,6 +223,38 @@ TEST_CASE("theta sketch: deserialize single item from java", "[theta_sketch]") {
162
223
  REQUIRE(sketch.get_upper_bound(1) == 1.0);
163
224
  }
164
225
 
226
+ TEST_CASE("theta sketch: deserialize compact exact from java", "[theta_sketch]") {
227
+ std::ifstream is;
228
+ is.exceptions(std::ios::failbit | std::ios::badbit);
229
+ is.open(inputPath + "theta_compact_exact_from_java.sk", std::ios::binary);
230
+ auto sketch = compact_theta_sketch::deserialize(is);
231
+ REQUIRE_FALSE(sketch.is_empty());
232
+ REQUIRE_FALSE(sketch.is_estimation_mode());
233
+ REQUIRE(sketch.is_ordered());
234
+ REQUIRE(sketch.get_num_retained() == 100);
235
+
236
+ // the same construction process in Java must have produced exactly the same sketch
237
+ auto update_sketch = update_theta_sketch::builder().build();
238
+ const int n = 100;
239
+ for (int i = 0; i < n; i++) update_sketch.update(i);
240
+ REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
241
+ REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
242
+ REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
243
+ REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
244
+ REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
245
+ REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
246
+ REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
247
+ REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
248
+ REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
249
+ compact_theta_sketch compact_sketch = update_sketch.compact();
250
+ // the sketches are ordered, so the iteration sequence must match exactly
251
+ auto iter = sketch.begin();
252
+ for (const auto& key: compact_sketch) {
253
+ REQUIRE(*iter == key);
254
+ ++iter;
255
+ }
256
+ }
257
+
165
258
  TEST_CASE("theta sketch: deserialize compact estimation from java", "[theta_sketch]") {
166
259
  std::ifstream is;
167
260
  is.exceptions(std::ios::failbit | std::ios::badbit);
@@ -198,6 +291,78 @@ TEST_CASE("theta sketch: deserialize compact estimation from java", "[theta_sket
198
291
  }
199
292
  }
200
293
 
294
+ TEST_CASE("theta sketch: deserialize compact v1 estimation from java", "[theta_sketch]") {
295
+ std::ifstream is;
296
+ is.exceptions(std::ios::failbit | std::ios::badbit);
297
+ is.open(inputPath + "theta_compact_estimation_from_java_v1.sk", std::ios::binary);
298
+ auto sketch = compact_theta_sketch::deserialize(is);
299
+ REQUIRE_FALSE(sketch.is_empty());
300
+ REQUIRE(sketch.is_estimation_mode());
301
+ REQUIRE(sketch.is_ordered());
302
+ REQUIRE(sketch.get_num_retained() == 4342);
303
+ REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
304
+ REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
305
+ REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
306
+ REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
307
+
308
+ // the same construction process in Java must have produced exactly the same sketch
309
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
310
+ const int n = 8192;
311
+ for (int i = 0; i < n; i++) update_sketch.update(i);
312
+ REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
313
+ REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
314
+ REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
315
+ REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
316
+ REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
317
+ REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
318
+ REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
319
+ REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
320
+ REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
321
+ compact_theta_sketch compact_sketch = update_sketch.compact();
322
+ // the sketches are ordered, so the iteration sequence must match exactly
323
+ auto iter = sketch.begin();
324
+ for (const auto& key: compact_sketch) {
325
+ REQUIRE(*iter == key);
326
+ ++iter;
327
+ }
328
+ }
329
+
330
+ TEST_CASE("theta sketch: deserialize compact v2 estimation from java", "[theta_sketch]") {
331
+ std::ifstream is;
332
+ is.exceptions(std::ios::failbit | std::ios::badbit);
333
+ is.open(inputPath + "theta_compact_estimation_from_java_v2.sk", std::ios::binary);
334
+ auto sketch = compact_theta_sketch::deserialize(is);
335
+ REQUIRE_FALSE(sketch.is_empty());
336
+ REQUIRE(sketch.is_estimation_mode());
337
+ REQUIRE(sketch.is_ordered());
338
+ REQUIRE(sketch.get_num_retained() == 4342);
339
+ REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
340
+ REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
341
+ REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
342
+ REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
343
+
344
+ // the same construction process in Java must have produced exactly the same sketch
345
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
346
+ const int n = 8192;
347
+ for (int i = 0; i < n; i++) update_sketch.update(i);
348
+ REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
349
+ REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
350
+ REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
351
+ REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
352
+ REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
353
+ REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
354
+ REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
355
+ REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
356
+ REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
357
+ compact_theta_sketch compact_sketch = update_sketch.compact();
358
+ // the sketches are ordered, so the iteration sequence must match exactly
359
+ auto iter = sketch.begin();
360
+ for (const auto& key: compact_sketch) {
361
+ REQUIRE(*iter == key);
362
+ ++iter;
363
+ }
364
+ }
365
+
201
366
  TEST_CASE("theta sketch: serialize deserialize stream and bytes equivalence", "[theta_sketch]") {
202
367
  update_theta_sketch update_sketch = update_theta_sketch::builder().build();
203
368
  const int n = 8192;
@@ -230,7 +395,13 @@ TEST_CASE("theta sketch: serialize deserialize stream and bytes equivalence", "[
230
395
  }
231
396
  }
232
397
 
233
- TEST_CASE("theta sketch: deserialize compact single item buffer overrun", "[theta_sketch]") {
398
+ TEST_CASE("theta sketch: deserialize empty buffer overrun", "[theta_sketch]") {
399
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
400
+ auto bytes = update_sketch.compact().serialize();
401
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
402
+ }
403
+
404
+ TEST_CASE("theta sketch: deserialize single item buffer overrun", "[theta_sketch]") {
234
405
  update_theta_sketch update_sketch = update_theta_sketch::builder().build();
235
406
  update_sketch.update(1);
236
407
  auto bytes = update_sketch.compact().serialize();
@@ -238,6 +409,27 @@ TEST_CASE("theta sketch: deserialize compact single item buffer overrun", "[thet
238
409
  REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
239
410
  }
240
411
 
412
+ TEST_CASE("theta sketch: deserialize exact mode buffer overrun", "[theta_sketch]") {
413
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
414
+ for (int i = 0; i < 1000; ++i) update_sketch.update(i);
415
+ auto bytes = update_sketch.compact().serialize();
416
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 7), std::out_of_range);
417
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 8), std::out_of_range);
418
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 16), std::out_of_range);
419
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
420
+ }
421
+
422
+ TEST_CASE("theta sketch: deserialize estimation mode buffer overrun", "[theta_sketch]") {
423
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
424
+ for (int i = 0; i < 10000; ++i) update_sketch.update(i);
425
+ auto bytes = update_sketch.compact().serialize();
426
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 7), std::out_of_range);
427
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 8), std::out_of_range);
428
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 16), std::out_of_range);
429
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 24), std::out_of_range);
430
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
431
+ }
432
+
241
433
  TEST_CASE("theta sketch: conversion constructor and wrapped compact", "[theta_sketch]") {
242
434
  update_theta_sketch update_sketch = update_theta_sketch::builder().build();
243
435
  const int n = 8192;
@@ -269,9 +461,245 @@ TEST_CASE("theta sketch: conversion constructor and wrapped compact", "[theta_sk
269
461
  REQUIRE(*it == entry);
270
462
  ++it;
271
463
  }
464
+ REQUIRE(ordered_compact3.get_estimate() == ordered_compact1.get_estimate());
465
+ REQUIRE(ordered_compact3.get_lower_bound(1) == ordered_compact1.get_lower_bound(1));
466
+ REQUIRE(ordered_compact3.get_upper_bound(1) == ordered_compact1.get_upper_bound(1));
467
+ REQUIRE(ordered_compact3.is_estimation_mode() == ordered_compact1.is_estimation_mode());
468
+ REQUIRE(ordered_compact3.get_theta() == ordered_compact1.get_theta());
469
+
272
470
 
273
471
  // seed mismatch
274
472
  REQUIRE_THROWS_AS(wrapped_compact_theta_sketch::wrap(bytes.data(), bytes.size(), 0), std::invalid_argument);
275
473
  }
276
474
 
475
+ TEST_CASE("theta sketch: wrap compact empty from java", "[theta_sketch]") {
476
+ std::ifstream is;
477
+ is.exceptions(std::ios::failbit | std::ios::badbit);
478
+ is.open(inputPath + "theta_compact_empty_from_java.sk", std::ios::binary | std::ios::ate);
479
+
480
+ std::vector<uint8_t> buf;
481
+ if(is) {
482
+ auto size = is.tellg();
483
+ buf.reserve(size);
484
+ buf.assign(size, 0);
485
+ is.seekg(0, std::ios_base::beg);
486
+ is.read((char*)(buf.data()), buf.size());
487
+ }
488
+
489
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
490
+ REQUIRE(sketch.is_empty());
491
+ REQUIRE_FALSE(sketch.is_estimation_mode());
492
+ REQUIRE(sketch.get_num_retained() == 0);
493
+ REQUIRE(sketch.get_theta() == 1.0);
494
+ REQUIRE(sketch.get_estimate() == 0.0);
495
+ REQUIRE(sketch.get_lower_bound(1) == 0.0);
496
+ REQUIRE(sketch.get_upper_bound(1) == 0.0);
497
+ }
498
+
499
+ TEST_CASE("theta sketch: wrap compact v1 empty from java", "[theta_sketch]") {
500
+ std::ifstream is;
501
+ is.exceptions(std::ios::failbit | std::ios::badbit);
502
+ is.open(inputPath + "theta_compact_empty_from_java_v1.sk", std::ios::binary | std::ios::ate);
503
+
504
+ std::vector<uint8_t> buf;
505
+ if(is) {
506
+ auto size = is.tellg();
507
+ buf.reserve(size);
508
+ buf.assign(size, 0);
509
+ is.seekg(0, std::ios_base::beg);
510
+ is.read((char*)(buf.data()), buf.size());
511
+ }
512
+
513
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
514
+ REQUIRE(sketch.is_empty());
515
+ REQUIRE_FALSE(sketch.is_estimation_mode());
516
+ REQUIRE(sketch.get_num_retained() == 0);
517
+ REQUIRE(sketch.get_theta() == 1.0);
518
+ REQUIRE(sketch.get_estimate() == 0.0);
519
+ REQUIRE(sketch.get_lower_bound(1) == 0.0);
520
+ REQUIRE(sketch.get_upper_bound(1) == 0.0);
521
+ }
522
+
523
+ TEST_CASE("theta sketch: wrap compact v2 empty from java", "[theta_sketch]") {
524
+ std::ifstream is;
525
+ is.exceptions(std::ios::failbit | std::ios::badbit);
526
+ is.open(inputPath + "theta_compact_empty_from_java_v2.sk", std::ios::binary | std::ios::ate);
527
+
528
+ std::vector<uint8_t> buf;
529
+ if(is) {
530
+ auto size = is.tellg();
531
+ buf.reserve(size);
532
+ buf.assign(size, 0);
533
+ is.seekg(0, std::ios_base::beg);
534
+ is.read((char*)(buf.data()), buf.size());
535
+ }
536
+
537
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
538
+ REQUIRE(sketch.is_empty());
539
+ REQUIRE_FALSE(sketch.is_estimation_mode());
540
+ REQUIRE(sketch.get_num_retained() == 0);
541
+ REQUIRE(sketch.get_theta() == 1.0);
542
+ REQUIRE(sketch.get_estimate() == 0.0);
543
+ REQUIRE(sketch.get_lower_bound(1) == 0.0);
544
+ REQUIRE(sketch.get_upper_bound(1) == 0.0);
545
+ }
546
+
547
+ TEST_CASE("theta sketch: wrap single item from java", "[theta_sketch]") {
548
+ std::ifstream is;
549
+ is.exceptions(std::ios::failbit | std::ios::badbit);
550
+ is.open(inputPath + "theta_compact_single_item_from_java.sk", std::ios::binary | std::ios::ate);
551
+ std::vector<uint8_t> buf;
552
+ if(is) {
553
+ auto size = is.tellg();
554
+ buf.reserve(size);
555
+ buf.assign(size, 0);
556
+ is.seekg(0, std::ios_base::beg);
557
+ is.read((char*)(buf.data()), buf.size());
558
+ }
559
+
560
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
561
+ REQUIRE_FALSE(sketch.is_empty());
562
+ REQUIRE_FALSE(sketch.is_estimation_mode());
563
+ REQUIRE(sketch.get_num_retained() == 1);
564
+ REQUIRE(sketch.get_theta() == 1.0);
565
+ REQUIRE(sketch.get_estimate() == 1.0);
566
+ REQUIRE(sketch.get_lower_bound(1) == 1.0);
567
+ REQUIRE(sketch.get_upper_bound(1) == 1.0);
568
+ }
569
+
570
+ TEST_CASE("theta sketch: wrap compact estimation from java", "[theta_sketch]") {
571
+ std::ifstream is;
572
+ is.exceptions(std::ios::failbit | std::ios::badbit);
573
+ is.open(inputPath + "theta_compact_estimation_from_java.sk", std::ios::binary | std::ios::ate);
574
+ std::vector<uint8_t> buf;
575
+ if(is) {
576
+ auto size = is.tellg();
577
+ buf.reserve(size);
578
+ buf.assign(size, 0);
579
+ is.seekg(0, std::ios_base::beg);
580
+ is.read((char*)(buf.data()), buf.size());
581
+ }
582
+
583
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
584
+ REQUIRE_FALSE(sketch.is_empty());
585
+ REQUIRE(sketch.is_estimation_mode());
586
+ REQUIRE(sketch.is_ordered());
587
+ REQUIRE(sketch.get_num_retained() == 4342);
588
+ REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
589
+ REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
590
+ REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
591
+ REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
592
+
593
+ // the same construction process in Java must have produced exactly the same sketch
594
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
595
+ const int n = 8192;
596
+ for (int i = 0; i < n; i++) update_sketch.update(i);
597
+ REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
598
+ REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
599
+ REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
600
+ REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
601
+ REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
602
+ REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
603
+ REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
604
+ REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
605
+ REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
606
+ compact_theta_sketch compact_sketch = update_sketch.compact();
607
+ // the sketches are ordered, so the iteration sequence must match exactly
608
+ auto iter = sketch.begin();
609
+ for (const auto& key: compact_sketch) {
610
+ REQUIRE(*iter == key);
611
+ ++iter;
612
+ }
613
+ }
614
+
615
+ TEST_CASE("theta sketch: wrap compact v1 estimation from java", "[theta_sketch]") {
616
+ std::ifstream is;
617
+ is.exceptions(std::ios::failbit | std::ios::badbit);
618
+ is.open(inputPath + "theta_compact_estimation_from_java_v1.sk", std::ios::binary | std::ios::ate);
619
+ std::vector<uint8_t> buf;
620
+ if(is) {
621
+ auto size = is.tellg();
622
+ buf.reserve(size);
623
+ buf.assign(size, 0);
624
+ is.seekg(0, std::ios_base::beg);
625
+ is.read((char*)(buf.data()), buf.size());
626
+ }
627
+
628
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
629
+ REQUIRE_FALSE(sketch.is_empty());
630
+ REQUIRE(sketch.is_estimation_mode());
631
+ // REQUIRE(sketch.is_ordered()); // v1 may not be ordered
632
+ REQUIRE(sketch.get_num_retained() == 4342);
633
+ REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
634
+ REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
635
+ REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
636
+ REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
637
+
638
+ // the same construction process in Java must have produced exactly the same sketch
639
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
640
+ const int n = 8192;
641
+ for (int i = 0; i < n; i++) update_sketch.update(i);
642
+ REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
643
+ REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
644
+ REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
645
+ REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
646
+ REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
647
+ REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
648
+ REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
649
+ REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
650
+ REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
651
+ compact_theta_sketch compact_sketch = update_sketch.compact();
652
+ // the sketches are ordered, so the iteration sequence must match exactly
653
+ auto iter = sketch.begin();
654
+ for (const auto& key: compact_sketch) {
655
+ REQUIRE(*iter == key);
656
+ ++iter;
657
+ }
658
+ }
659
+
660
+ TEST_CASE("theta sketch: wrap compact v2 estimation from java", "[theta_sketch]") {
661
+ std::ifstream is;
662
+ is.exceptions(std::ios::failbit | std::ios::badbit);
663
+ is.open(inputPath + "theta_compact_estimation_from_java_v2.sk", std::ios::binary | std::ios::ate);
664
+ std::vector<uint8_t> buf;
665
+ if(is) {
666
+ auto size = is.tellg();
667
+ buf.reserve(size);
668
+ buf.assign(size, 0);
669
+ is.seekg(0, std::ios_base::beg);
670
+ is.read((char*)(buf.data()), buf.size());
671
+ }
672
+
673
+ auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
674
+ REQUIRE_FALSE(sketch.is_empty());
675
+ REQUIRE(sketch.is_estimation_mode());
676
+ // REQUIRE(sketch.is_ordered()); // v1 may not be ordered
677
+ REQUIRE(sketch.get_num_retained() == 4342);
678
+ REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
679
+ REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
680
+ REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
681
+ REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
682
+
683
+ // the same construction process in Java must have produced exactly the same sketch
684
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
685
+ const int n = 8192;
686
+ for (int i = 0; i < n; i++) update_sketch.update(i);
687
+ REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
688
+ REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
689
+ REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
690
+ REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
691
+ REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
692
+ REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
693
+ REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
694
+ REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
695
+ REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
696
+ compact_theta_sketch compact_sketch = update_sketch.compact();
697
+ // the sketches are ordered, so the iteration sequence must match exactly
698
+ auto iter = sketch.begin();
699
+ for (const auto& key: compact_sketch) {
700
+ REQUIRE(*iter == key);
701
+ ++iter;
702
+ }
703
+ }
704
+
277
705
  } /* namespace datasketches */
@@ -21,6 +21,8 @@
21
21
 
22
22
  #include <theta_union.hpp>
23
23
 
24
+ #include <stdexcept>
25
+
24
26
  namespace datasketches {
25
27
 
26
28
  TEST_CASE("theta union: empty", "[theta_union]") {
@@ -51,35 +53,41 @@ TEST_CASE("theta union: non empty no retained keys", "[theta_union]") {
51
53
  }
52
54
 
53
55
  TEST_CASE("theta union: exact mode half overlap", "[theta_union]") {
54
- update_theta_sketch sketch1 = update_theta_sketch::builder().build();
56
+ auto sketch1 = update_theta_sketch::builder().build();
55
57
  int value = 0;
56
58
  for (int i = 0; i < 1000; i++) sketch1.update(value++);
57
59
 
58
- update_theta_sketch sketch2 = update_theta_sketch::builder().build();
60
+ auto sketch2 = update_theta_sketch::builder().build();
59
61
  value = 500;
60
62
  for (int i = 0; i < 1000; i++) sketch2.update(value++);
61
63
 
62
- theta_union u = theta_union::builder().build();
64
+ auto u = theta_union::builder().build();
63
65
  u.update(sketch1);
64
66
  u.update(sketch2);
65
- compact_theta_sketch sketch3 = u.get_result();
67
+ auto sketch3 = u.get_result();
66
68
  REQUIRE_FALSE(sketch3.is_empty());
67
69
  REQUIRE_FALSE(sketch3.is_estimation_mode());
68
70
  REQUIRE(sketch3.get_estimate() == 1500.0);
71
+
72
+ u.reset();
73
+ sketch3 = u.get_result();
74
+ REQUIRE(sketch3.get_num_retained() == 0);
75
+ REQUIRE(sketch3.is_empty());
76
+ REQUIRE_FALSE(sketch3.is_estimation_mode());
69
77
  }
70
78
 
71
79
  TEST_CASE("theta union: exact mode half overlap wrapped compact", "[theta_union]") {
72
- update_theta_sketch sketch1 = update_theta_sketch::builder().build();
80
+ auto sketch1 = update_theta_sketch::builder().build();
73
81
  int value = 0;
74
82
  for (int i = 0; i < 1000; i++) sketch1.update(value++);
75
83
  auto bytes1 = sketch1.compact().serialize();
76
84
 
77
- update_theta_sketch sketch2 = update_theta_sketch::builder().build();
85
+ auto sketch2 = update_theta_sketch::builder().build();
78
86
  value = 500;
79
87
  for (int i = 0; i < 1000; i++) sketch2.update(value++);
80
88
  auto bytes2 = sketch2.compact().serialize();
81
89
 
82
- theta_union u = theta_union::builder().build();
90
+ auto u = theta_union::builder().build();
83
91
  u.update(wrapped_compact_theta_sketch::wrap(bytes1.data(), bytes1.size()));
84
92
  u.update(wrapped_compact_theta_sketch::wrap(bytes2.data(), bytes2.size()));
85
93
  compact_theta_sketch sketch3 = u.get_result();
@@ -89,22 +97,28 @@ TEST_CASE("theta union: exact mode half overlap wrapped compact", "[theta_union]
89
97
  }
90
98
 
91
99
  TEST_CASE("theta union: estimation mode half overlap", "[theta_union]") {
92
- update_theta_sketch sketch1 = update_theta_sketch::builder().build();
100
+ auto sketch1 = update_theta_sketch::builder().build();
93
101
  int value = 0;
94
102
  for (int i = 0; i < 10000; i++) sketch1.update(value++);
95
103
 
96
- update_theta_sketch sketch2 = update_theta_sketch::builder().build();
104
+ auto sketch2 = update_theta_sketch::builder().build();
97
105
  value = 5000;
98
106
  for (int i = 0; i < 10000; i++) sketch2.update(value++);
99
107
 
100
- theta_union u = theta_union::builder().build();
108
+ auto u = theta_union::builder().build();
101
109
  u.update(sketch1);
102
110
  u.update(sketch2);
103
- compact_theta_sketch sketch3 = u.get_result();
111
+ auto sketch3 = u.get_result();
104
112
  REQUIRE_FALSE(sketch3.is_empty());
105
113
  REQUIRE(sketch3.is_estimation_mode());
106
114
  REQUIRE(sketch3.get_estimate() == Approx(15000).margin(15000 * 0.01));
107
115
  //std::cerr << sketch3.to_string(true);
116
+
117
+ u.reset();
118
+ sketch3 = u.get_result();
119
+ REQUIRE(sketch3.get_num_retained() == 0);
120
+ REQUIRE(sketch3.is_empty());
121
+ REQUIRE_FALSE(sketch3.is_estimation_mode());
108
122
  }
109
123
 
110
124
  TEST_CASE("theta union: seed mismatch", "[theta_union]") {
@@ -32,41 +32,26 @@ target_include_directories(tuple
32
32
  target_link_libraries(tuple INTERFACE common theta)
33
33
  target_compile_features(tuple INTERFACE cxx_std_11)
34
34
 
35
- set(tuple_HEADERS "")
36
- list(APPEND tuple_HEADERS "include/tuple_sketch.hpp;include/tuple_sketch_impl.hpp")
37
- list(APPEND tuple_HEADERS "include/tuple_union.hpp;include/tuple_union_impl.hpp")
38
- list(APPEND tuple_HEADERS "include/tuple_intersection.hpp;include/tuple_intersection_impl.hpp")
39
- list(APPEND tuple_HEADERS "include/tuple_a_not_b.hpp;include/tuple_a_not_b_impl.hpp")
40
- list(APPEND tuple_HEADERS "include/tuple_jaccard_similarity.hpp")
41
- list(APPEND tuple_HEADERS "include/array_of_doubles_sketch.hpp;include/array_of_doubles_sketch_impl.hpp")
42
- list(APPEND tuple_HEADERS "include/array_of_doubles_union.hpp;include/array_of_doubles_union_impl.hpp")
43
- list(APPEND tuple_HEADERS "include/array_of_doubles_intersection.hpp;include/array_of_doubles_intersection_impl.hpp")
44
- list(APPEND tuple_HEADERS "include/array_of_doubles_a_not_b.hpp;include/array_of_doubles_a_not_b_impl.hpp")
45
-
46
35
  install(TARGETS tuple
47
36
  EXPORT ${PROJECT_NAME}
48
37
  )
49
38
 
50
- install(FILES ${tuple_HEADERS}
39
+ install(FILES
40
+ include/tuple_sketch.hpp
41
+ include/tuple_sketch_impl.hpp
42
+ include/tuple_union.hpp
43
+ include/tuple_union_impl.hpp
44
+ include/tuple_intersection.hpp
45
+ include/tuple_intersection_impl.hpp
46
+ include/tuple_a_not_b.hpp
47
+ include/tuple_a_not_b_impl.hpp
48
+ include/tuple_jaccard_similarity.hpp
49
+ include/array_of_doubles_sketch.hpp
50
+ include/array_of_doubles_sketch_impl.hpp
51
+ include/array_of_doubles_union.hpp
52
+ include/array_of_doubles_union_impl.hpp
53
+ include/array_of_doubles_intersection.hpp
54
+ include/array_of_doubles_intersection_impl.hpp
55
+ include/array_of_doubles_a_not_b.hpp
56
+ include/array_of_doubles_a_not_b_impl.hpp
51
57
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
52
-
53
- target_sources(tuple
54
- INTERFACE
55
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_sketch.hpp
56
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_sketch_impl.hpp
57
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_union.hpp
58
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_union_impl.hpp
59
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_intersection.hpp
60
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_intersection_impl.hpp
61
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_a_not_b.hpp
62
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_a_not_b_impl.hpp
63
- ${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_jaccard_similarity.hpp
64
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch.hpp
65
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch_impl.hpp
66
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_union.hpp
67
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_union_impl.hpp
68
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_intersection.hpp
69
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_intersection_impl.hpp
70
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_a_not_b.hpp
71
- ${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_a_not_b_impl.hpp
72
- )