datasketches 0.2.2 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/README.md +8 -8
- data/ext/datasketches/kll_wrapper.cpp +5 -1
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +27 -5
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +18 -13
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +16 -0
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +10 -6
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +30 -12
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +5 -19
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +103 -44
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +110 -130
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +156 -23
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +17 -6
- data/vendor/datasketches-cpp/python/README.md +57 -50
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +49 -14
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +12 -5
- data/vendor/datasketches-cpp/python/tests/kll_test.py +12 -6
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
- data/vendor/datasketches-cpp/req/CMakeLists.txt +6 -21
- data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +66 -61
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +54 -12
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +45 -34
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +33 -15
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
- data/vendor/datasketches-cpp/setup.py +10 -7
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +92 -23
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +3 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +150 -93
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -2
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +9 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +39 -10
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +446 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +429 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -11
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +29 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +34 -14
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +8 -0
- metadata +34 -12
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
|
@@ -17,8 +17,11 @@
|
|
|
17
17
|
* under the License.
|
|
18
18
|
*/
|
|
19
19
|
|
|
20
|
+
#include <istream>
|
|
20
21
|
#include <fstream>
|
|
21
22
|
#include <sstream>
|
|
23
|
+
#include <vector>
|
|
24
|
+
#include <stdexcept>
|
|
22
25
|
|
|
23
26
|
#include <catch.hpp>
|
|
24
27
|
#include <theta_sketch.hpp>
|
|
@@ -39,6 +42,7 @@ TEST_CASE("theta sketch: empty", "[theta_sketch]") {
|
|
|
39
42
|
REQUIRE(update_sketch.get_estimate() == 0.0);
|
|
40
43
|
REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
|
|
41
44
|
REQUIRE(update_sketch.get_upper_bound(1) == 0.0);
|
|
45
|
+
REQUIRE(update_sketch.is_ordered());
|
|
42
46
|
|
|
43
47
|
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
44
48
|
REQUIRE(compact_sketch.is_empty());
|
|
@@ -47,6 +51,10 @@ TEST_CASE("theta sketch: empty", "[theta_sketch]") {
|
|
|
47
51
|
REQUIRE(compact_sketch.get_estimate() == 0.0);
|
|
48
52
|
REQUIRE(compact_sketch.get_lower_bound(1) == 0.0);
|
|
49
53
|
REQUIRE(compact_sketch.get_upper_bound(1) == 0.0);
|
|
54
|
+
REQUIRE(compact_sketch.is_ordered());
|
|
55
|
+
|
|
56
|
+
// empty is forced to be ordered
|
|
57
|
+
REQUIRE(update_sketch.compact(false).is_ordered());
|
|
50
58
|
}
|
|
51
59
|
|
|
52
60
|
TEST_CASE("theta sketch: non empty no retained keys", "[theta_sketch]") {
|
|
@@ -67,6 +75,14 @@ TEST_CASE("theta sketch: non empty no retained keys", "[theta_sketch]") {
|
|
|
67
75
|
REQUIRE(compact_sketch.get_estimate() == 0.0);
|
|
68
76
|
REQUIRE(compact_sketch.get_lower_bound(1) == 0.0);
|
|
69
77
|
REQUIRE(compact_sketch.get_upper_bound(1) > 0);
|
|
78
|
+
|
|
79
|
+
update_sketch.reset();
|
|
80
|
+
REQUIRE(update_sketch.is_empty());
|
|
81
|
+
REQUIRE_FALSE(update_sketch.is_estimation_mode());
|
|
82
|
+
REQUIRE(update_sketch.get_theta() == 1.0);
|
|
83
|
+
REQUIRE(update_sketch.get_estimate() == 0.0);
|
|
84
|
+
REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
|
|
85
|
+
REQUIRE(update_sketch.get_upper_bound(1) == 0.0);
|
|
70
86
|
}
|
|
71
87
|
|
|
72
88
|
TEST_CASE("theta sketch: single item", "[theta_sketch]") {
|
|
@@ -78,6 +94,7 @@ TEST_CASE("theta sketch: single item", "[theta_sketch]") {
|
|
|
78
94
|
REQUIRE(update_sketch.get_estimate() == 1.0);
|
|
79
95
|
REQUIRE(update_sketch.get_lower_bound(1) == 1.0);
|
|
80
96
|
REQUIRE(update_sketch.get_upper_bound(1) == 1.0);
|
|
97
|
+
REQUIRE(update_sketch.is_ordered()); // one item is ordered
|
|
81
98
|
|
|
82
99
|
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
83
100
|
REQUIRE_FALSE(compact_sketch.is_empty());
|
|
@@ -86,6 +103,10 @@ TEST_CASE("theta sketch: single item", "[theta_sketch]") {
|
|
|
86
103
|
REQUIRE(compact_sketch.get_estimate() == 1.0);
|
|
87
104
|
REQUIRE(compact_sketch.get_lower_bound(1) == 1.0);
|
|
88
105
|
REQUIRE(compact_sketch.get_upper_bound(1) == 1.0);
|
|
106
|
+
REQUIRE(compact_sketch.is_ordered());
|
|
107
|
+
|
|
108
|
+
// single item is forced to be ordered
|
|
109
|
+
REQUIRE(update_sketch.compact(false).is_ordered());
|
|
89
110
|
}
|
|
90
111
|
|
|
91
112
|
TEST_CASE("theta sketch: resize exact", "[theta_sketch]") {
|
|
@@ -97,6 +118,7 @@ TEST_CASE("theta sketch: resize exact", "[theta_sketch]") {
|
|
|
97
118
|
REQUIRE(update_sketch.get_estimate() == 2000.0);
|
|
98
119
|
REQUIRE(update_sketch.get_lower_bound(1) == 2000.0);
|
|
99
120
|
REQUIRE(update_sketch.get_upper_bound(1) == 2000.0);
|
|
121
|
+
REQUIRE_FALSE(update_sketch.is_ordered());
|
|
100
122
|
|
|
101
123
|
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
102
124
|
REQUIRE_FALSE(compact_sketch.is_empty());
|
|
@@ -105,6 +127,17 @@ TEST_CASE("theta sketch: resize exact", "[theta_sketch]") {
|
|
|
105
127
|
REQUIRE(compact_sketch.get_estimate() == 2000.0);
|
|
106
128
|
REQUIRE(compact_sketch.get_lower_bound(1) == 2000.0);
|
|
107
129
|
REQUIRE(compact_sketch.get_upper_bound(1) == 2000.0);
|
|
130
|
+
REQUIRE(compact_sketch.is_ordered());
|
|
131
|
+
|
|
132
|
+
update_sketch.reset();
|
|
133
|
+
REQUIRE(update_sketch.is_empty());
|
|
134
|
+
REQUIRE_FALSE(update_sketch.is_estimation_mode());
|
|
135
|
+
REQUIRE(update_sketch.get_theta() == 1.0);
|
|
136
|
+
REQUIRE(update_sketch.get_estimate() == 0.0);
|
|
137
|
+
REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
|
|
138
|
+
REQUIRE(update_sketch.get_upper_bound(1) == 0.0);
|
|
139
|
+
REQUIRE(update_sketch.is_ordered());
|
|
140
|
+
|
|
108
141
|
}
|
|
109
142
|
|
|
110
143
|
TEST_CASE("theta sketch: estimation", "[theta_sketch]") {
|
|
@@ -148,6 +181,34 @@ TEST_CASE("theta sketch: deserialize compact empty from java", "[theta_sketch]")
|
|
|
148
181
|
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
|
149
182
|
}
|
|
150
183
|
|
|
184
|
+
TEST_CASE("theta sketch: deserialize compact v1 empty from java", "[theta_sketch]") {
|
|
185
|
+
std::ifstream is;
|
|
186
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
187
|
+
is.open(inputPath + "theta_compact_empty_from_java_v1.sk", std::ios::binary);
|
|
188
|
+
auto sketch = compact_theta_sketch::deserialize(is);
|
|
189
|
+
REQUIRE(sketch.is_empty());
|
|
190
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
191
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
|
192
|
+
REQUIRE(sketch.get_theta() == 1.0);
|
|
193
|
+
REQUIRE(sketch.get_estimate() == 0.0);
|
|
194
|
+
REQUIRE(sketch.get_lower_bound(1) == 0.0);
|
|
195
|
+
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
TEST_CASE("theta sketch: deserialize compact v2 empty from java", "[theta_sketch]") {
|
|
199
|
+
std::ifstream is;
|
|
200
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
201
|
+
is.open(inputPath + "theta_compact_empty_from_java_v2.sk", std::ios::binary);
|
|
202
|
+
auto sketch = compact_theta_sketch::deserialize(is);
|
|
203
|
+
REQUIRE(sketch.is_empty());
|
|
204
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
205
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
|
206
|
+
REQUIRE(sketch.get_theta() == 1.0);
|
|
207
|
+
REQUIRE(sketch.get_estimate() == 0.0);
|
|
208
|
+
REQUIRE(sketch.get_lower_bound(1) == 0.0);
|
|
209
|
+
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
|
210
|
+
}
|
|
211
|
+
|
|
151
212
|
TEST_CASE("theta sketch: deserialize single item from java", "[theta_sketch]") {
|
|
152
213
|
std::ifstream is;
|
|
153
214
|
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
@@ -162,6 +223,38 @@ TEST_CASE("theta sketch: deserialize single item from java", "[theta_sketch]") {
|
|
|
162
223
|
REQUIRE(sketch.get_upper_bound(1) == 1.0);
|
|
163
224
|
}
|
|
164
225
|
|
|
226
|
+
TEST_CASE("theta sketch: deserialize compact exact from java", "[theta_sketch]") {
|
|
227
|
+
std::ifstream is;
|
|
228
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
229
|
+
is.open(inputPath + "theta_compact_exact_from_java.sk", std::ios::binary);
|
|
230
|
+
auto sketch = compact_theta_sketch::deserialize(is);
|
|
231
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
232
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
233
|
+
REQUIRE(sketch.is_ordered());
|
|
234
|
+
REQUIRE(sketch.get_num_retained() == 100);
|
|
235
|
+
|
|
236
|
+
// the same construction process in Java must have produced exactly the same sketch
|
|
237
|
+
auto update_sketch = update_theta_sketch::builder().build();
|
|
238
|
+
const int n = 100;
|
|
239
|
+
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
240
|
+
REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
|
|
241
|
+
REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
|
|
242
|
+
REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
|
|
243
|
+
REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
|
|
244
|
+
REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
|
|
245
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
|
|
246
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
|
|
247
|
+
REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
|
|
248
|
+
REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
|
|
249
|
+
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
250
|
+
// the sketches are ordered, so the iteration sequence must match exactly
|
|
251
|
+
auto iter = sketch.begin();
|
|
252
|
+
for (const auto& key: compact_sketch) {
|
|
253
|
+
REQUIRE(*iter == key);
|
|
254
|
+
++iter;
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
165
258
|
TEST_CASE("theta sketch: deserialize compact estimation from java", "[theta_sketch]") {
|
|
166
259
|
std::ifstream is;
|
|
167
260
|
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
@@ -198,6 +291,78 @@ TEST_CASE("theta sketch: deserialize compact estimation from java", "[theta_sket
|
|
|
198
291
|
}
|
|
199
292
|
}
|
|
200
293
|
|
|
294
|
+
TEST_CASE("theta sketch: deserialize compact v1 estimation from java", "[theta_sketch]") {
|
|
295
|
+
std::ifstream is;
|
|
296
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
297
|
+
is.open(inputPath + "theta_compact_estimation_from_java_v1.sk", std::ios::binary);
|
|
298
|
+
auto sketch = compact_theta_sketch::deserialize(is);
|
|
299
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
300
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
301
|
+
REQUIRE(sketch.is_ordered());
|
|
302
|
+
REQUIRE(sketch.get_num_retained() == 4342);
|
|
303
|
+
REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
|
|
304
|
+
REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
|
|
305
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
|
|
306
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
|
|
307
|
+
|
|
308
|
+
// the same construction process in Java must have produced exactly the same sketch
|
|
309
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
310
|
+
const int n = 8192;
|
|
311
|
+
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
312
|
+
REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
|
|
313
|
+
REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
|
|
314
|
+
REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
|
|
315
|
+
REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
|
|
316
|
+
REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
|
|
317
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
|
|
318
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
|
|
319
|
+
REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
|
|
320
|
+
REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
|
|
321
|
+
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
322
|
+
// the sketches are ordered, so the iteration sequence must match exactly
|
|
323
|
+
auto iter = sketch.begin();
|
|
324
|
+
for (const auto& key: compact_sketch) {
|
|
325
|
+
REQUIRE(*iter == key);
|
|
326
|
+
++iter;
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
TEST_CASE("theta sketch: deserialize compact v2 estimation from java", "[theta_sketch]") {
|
|
331
|
+
std::ifstream is;
|
|
332
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
333
|
+
is.open(inputPath + "theta_compact_estimation_from_java_v2.sk", std::ios::binary);
|
|
334
|
+
auto sketch = compact_theta_sketch::deserialize(is);
|
|
335
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
336
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
337
|
+
REQUIRE(sketch.is_ordered());
|
|
338
|
+
REQUIRE(sketch.get_num_retained() == 4342);
|
|
339
|
+
REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
|
|
340
|
+
REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
|
|
341
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
|
|
342
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
|
|
343
|
+
|
|
344
|
+
// the same construction process in Java must have produced exactly the same sketch
|
|
345
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
346
|
+
const int n = 8192;
|
|
347
|
+
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
348
|
+
REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
|
|
349
|
+
REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
|
|
350
|
+
REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
|
|
351
|
+
REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
|
|
352
|
+
REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
|
|
353
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
|
|
354
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
|
|
355
|
+
REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
|
|
356
|
+
REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
|
|
357
|
+
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
358
|
+
// the sketches are ordered, so the iteration sequence must match exactly
|
|
359
|
+
auto iter = sketch.begin();
|
|
360
|
+
for (const auto& key: compact_sketch) {
|
|
361
|
+
REQUIRE(*iter == key);
|
|
362
|
+
++iter;
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
|
|
201
366
|
TEST_CASE("theta sketch: serialize deserialize stream and bytes equivalence", "[theta_sketch]") {
|
|
202
367
|
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
203
368
|
const int n = 8192;
|
|
@@ -230,7 +395,13 @@ TEST_CASE("theta sketch: serialize deserialize stream and bytes equivalence", "[
|
|
|
230
395
|
}
|
|
231
396
|
}
|
|
232
397
|
|
|
233
|
-
TEST_CASE("theta sketch: deserialize
|
|
398
|
+
TEST_CASE("theta sketch: deserialize empty buffer overrun", "[theta_sketch]") {
|
|
399
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
400
|
+
auto bytes = update_sketch.compact().serialize();
|
|
401
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
TEST_CASE("theta sketch: deserialize single item buffer overrun", "[theta_sketch]") {
|
|
234
405
|
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
235
406
|
update_sketch.update(1);
|
|
236
407
|
auto bytes = update_sketch.compact().serialize();
|
|
@@ -238,6 +409,27 @@ TEST_CASE("theta sketch: deserialize compact single item buffer overrun", "[thet
|
|
|
238
409
|
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
239
410
|
}
|
|
240
411
|
|
|
412
|
+
TEST_CASE("theta sketch: deserialize exact mode buffer overrun", "[theta_sketch]") {
|
|
413
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
414
|
+
for (int i = 0; i < 1000; ++i) update_sketch.update(i);
|
|
415
|
+
auto bytes = update_sketch.compact().serialize();
|
|
416
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 7), std::out_of_range);
|
|
417
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 8), std::out_of_range);
|
|
418
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 16), std::out_of_range);
|
|
419
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
TEST_CASE("theta sketch: deserialize estimation mode buffer overrun", "[theta_sketch]") {
|
|
423
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
424
|
+
for (int i = 0; i < 10000; ++i) update_sketch.update(i);
|
|
425
|
+
auto bytes = update_sketch.compact().serialize();
|
|
426
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 7), std::out_of_range);
|
|
427
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 8), std::out_of_range);
|
|
428
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 16), std::out_of_range);
|
|
429
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 24), std::out_of_range);
|
|
430
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
431
|
+
}
|
|
432
|
+
|
|
241
433
|
TEST_CASE("theta sketch: conversion constructor and wrapped compact", "[theta_sketch]") {
|
|
242
434
|
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
243
435
|
const int n = 8192;
|
|
@@ -269,9 +461,245 @@ TEST_CASE("theta sketch: conversion constructor and wrapped compact", "[theta_sk
|
|
|
269
461
|
REQUIRE(*it == entry);
|
|
270
462
|
++it;
|
|
271
463
|
}
|
|
464
|
+
REQUIRE(ordered_compact3.get_estimate() == ordered_compact1.get_estimate());
|
|
465
|
+
REQUIRE(ordered_compact3.get_lower_bound(1) == ordered_compact1.get_lower_bound(1));
|
|
466
|
+
REQUIRE(ordered_compact3.get_upper_bound(1) == ordered_compact1.get_upper_bound(1));
|
|
467
|
+
REQUIRE(ordered_compact3.is_estimation_mode() == ordered_compact1.is_estimation_mode());
|
|
468
|
+
REQUIRE(ordered_compact3.get_theta() == ordered_compact1.get_theta());
|
|
469
|
+
|
|
272
470
|
|
|
273
471
|
// seed mismatch
|
|
274
472
|
REQUIRE_THROWS_AS(wrapped_compact_theta_sketch::wrap(bytes.data(), bytes.size(), 0), std::invalid_argument);
|
|
275
473
|
}
|
|
276
474
|
|
|
475
|
+
TEST_CASE("theta sketch: wrap compact empty from java", "[theta_sketch]") {
|
|
476
|
+
std::ifstream is;
|
|
477
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
478
|
+
is.open(inputPath + "theta_compact_empty_from_java.sk", std::ios::binary | std::ios::ate);
|
|
479
|
+
|
|
480
|
+
std::vector<uint8_t> buf;
|
|
481
|
+
if(is) {
|
|
482
|
+
auto size = is.tellg();
|
|
483
|
+
buf.reserve(size);
|
|
484
|
+
buf.assign(size, 0);
|
|
485
|
+
is.seekg(0, std::ios_base::beg);
|
|
486
|
+
is.read((char*)(buf.data()), buf.size());
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
|
|
490
|
+
REQUIRE(sketch.is_empty());
|
|
491
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
492
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
|
493
|
+
REQUIRE(sketch.get_theta() == 1.0);
|
|
494
|
+
REQUIRE(sketch.get_estimate() == 0.0);
|
|
495
|
+
REQUIRE(sketch.get_lower_bound(1) == 0.0);
|
|
496
|
+
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
TEST_CASE("theta sketch: wrap compact v1 empty from java", "[theta_sketch]") {
|
|
500
|
+
std::ifstream is;
|
|
501
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
502
|
+
is.open(inputPath + "theta_compact_empty_from_java_v1.sk", std::ios::binary | std::ios::ate);
|
|
503
|
+
|
|
504
|
+
std::vector<uint8_t> buf;
|
|
505
|
+
if(is) {
|
|
506
|
+
auto size = is.tellg();
|
|
507
|
+
buf.reserve(size);
|
|
508
|
+
buf.assign(size, 0);
|
|
509
|
+
is.seekg(0, std::ios_base::beg);
|
|
510
|
+
is.read((char*)(buf.data()), buf.size());
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
|
|
514
|
+
REQUIRE(sketch.is_empty());
|
|
515
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
516
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
|
517
|
+
REQUIRE(sketch.get_theta() == 1.0);
|
|
518
|
+
REQUIRE(sketch.get_estimate() == 0.0);
|
|
519
|
+
REQUIRE(sketch.get_lower_bound(1) == 0.0);
|
|
520
|
+
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
TEST_CASE("theta sketch: wrap compact v2 empty from java", "[theta_sketch]") {
|
|
524
|
+
std::ifstream is;
|
|
525
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
526
|
+
is.open(inputPath + "theta_compact_empty_from_java_v2.sk", std::ios::binary | std::ios::ate);
|
|
527
|
+
|
|
528
|
+
std::vector<uint8_t> buf;
|
|
529
|
+
if(is) {
|
|
530
|
+
auto size = is.tellg();
|
|
531
|
+
buf.reserve(size);
|
|
532
|
+
buf.assign(size, 0);
|
|
533
|
+
is.seekg(0, std::ios_base::beg);
|
|
534
|
+
is.read((char*)(buf.data()), buf.size());
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
|
|
538
|
+
REQUIRE(sketch.is_empty());
|
|
539
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
540
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
|
541
|
+
REQUIRE(sketch.get_theta() == 1.0);
|
|
542
|
+
REQUIRE(sketch.get_estimate() == 0.0);
|
|
543
|
+
REQUIRE(sketch.get_lower_bound(1) == 0.0);
|
|
544
|
+
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
TEST_CASE("theta sketch: wrap single item from java", "[theta_sketch]") {
|
|
548
|
+
std::ifstream is;
|
|
549
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
550
|
+
is.open(inputPath + "theta_compact_single_item_from_java.sk", std::ios::binary | std::ios::ate);
|
|
551
|
+
std::vector<uint8_t> buf;
|
|
552
|
+
if(is) {
|
|
553
|
+
auto size = is.tellg();
|
|
554
|
+
buf.reserve(size);
|
|
555
|
+
buf.assign(size, 0);
|
|
556
|
+
is.seekg(0, std::ios_base::beg);
|
|
557
|
+
is.read((char*)(buf.data()), buf.size());
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
|
|
561
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
562
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
563
|
+
REQUIRE(sketch.get_num_retained() == 1);
|
|
564
|
+
REQUIRE(sketch.get_theta() == 1.0);
|
|
565
|
+
REQUIRE(sketch.get_estimate() == 1.0);
|
|
566
|
+
REQUIRE(sketch.get_lower_bound(1) == 1.0);
|
|
567
|
+
REQUIRE(sketch.get_upper_bound(1) == 1.0);
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
TEST_CASE("theta sketch: wrap compact estimation from java", "[theta_sketch]") {
|
|
571
|
+
std::ifstream is;
|
|
572
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
573
|
+
is.open(inputPath + "theta_compact_estimation_from_java.sk", std::ios::binary | std::ios::ate);
|
|
574
|
+
std::vector<uint8_t> buf;
|
|
575
|
+
if(is) {
|
|
576
|
+
auto size = is.tellg();
|
|
577
|
+
buf.reserve(size);
|
|
578
|
+
buf.assign(size, 0);
|
|
579
|
+
is.seekg(0, std::ios_base::beg);
|
|
580
|
+
is.read((char*)(buf.data()), buf.size());
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
|
|
584
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
585
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
586
|
+
REQUIRE(sketch.is_ordered());
|
|
587
|
+
REQUIRE(sketch.get_num_retained() == 4342);
|
|
588
|
+
REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
|
|
589
|
+
REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
|
|
590
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
|
|
591
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
|
|
592
|
+
|
|
593
|
+
// the same construction process in Java must have produced exactly the same sketch
|
|
594
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
595
|
+
const int n = 8192;
|
|
596
|
+
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
597
|
+
REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
|
|
598
|
+
REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
|
|
599
|
+
REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
|
|
600
|
+
REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
|
|
601
|
+
REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
|
|
602
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
|
|
603
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
|
|
604
|
+
REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
|
|
605
|
+
REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
|
|
606
|
+
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
607
|
+
// the sketches are ordered, so the iteration sequence must match exactly
|
|
608
|
+
auto iter = sketch.begin();
|
|
609
|
+
for (const auto& key: compact_sketch) {
|
|
610
|
+
REQUIRE(*iter == key);
|
|
611
|
+
++iter;
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
TEST_CASE("theta sketch: wrap compact v1 estimation from java", "[theta_sketch]") {
|
|
616
|
+
std::ifstream is;
|
|
617
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
618
|
+
is.open(inputPath + "theta_compact_estimation_from_java_v1.sk", std::ios::binary | std::ios::ate);
|
|
619
|
+
std::vector<uint8_t> buf;
|
|
620
|
+
if(is) {
|
|
621
|
+
auto size = is.tellg();
|
|
622
|
+
buf.reserve(size);
|
|
623
|
+
buf.assign(size, 0);
|
|
624
|
+
is.seekg(0, std::ios_base::beg);
|
|
625
|
+
is.read((char*)(buf.data()), buf.size());
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
|
|
629
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
630
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
631
|
+
// REQUIRE(sketch.is_ordered()); // v1 may not be ordered
|
|
632
|
+
REQUIRE(sketch.get_num_retained() == 4342);
|
|
633
|
+
REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
|
|
634
|
+
REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
|
|
635
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
|
|
636
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
|
|
637
|
+
|
|
638
|
+
// the same construction process in Java must have produced exactly the same sketch
|
|
639
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
640
|
+
const int n = 8192;
|
|
641
|
+
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
642
|
+
REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
|
|
643
|
+
REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
|
|
644
|
+
REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
|
|
645
|
+
REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
|
|
646
|
+
REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
|
|
647
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
|
|
648
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
|
|
649
|
+
REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
|
|
650
|
+
REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
|
|
651
|
+
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
652
|
+
// the sketches are ordered, so the iteration sequence must match exactly
|
|
653
|
+
auto iter = sketch.begin();
|
|
654
|
+
for (const auto& key: compact_sketch) {
|
|
655
|
+
REQUIRE(*iter == key);
|
|
656
|
+
++iter;
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
TEST_CASE("theta sketch: wrap compact v2 estimation from java", "[theta_sketch]") {
|
|
661
|
+
std::ifstream is;
|
|
662
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
663
|
+
is.open(inputPath + "theta_compact_estimation_from_java_v2.sk", std::ios::binary | std::ios::ate);
|
|
664
|
+
std::vector<uint8_t> buf;
|
|
665
|
+
if(is) {
|
|
666
|
+
auto size = is.tellg();
|
|
667
|
+
buf.reserve(size);
|
|
668
|
+
buf.assign(size, 0);
|
|
669
|
+
is.seekg(0, std::ios_base::beg);
|
|
670
|
+
is.read((char*)(buf.data()), buf.size());
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
|
|
674
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
675
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
676
|
+
// REQUIRE(sketch.is_ordered()); // v1 may not be ordered
|
|
677
|
+
REQUIRE(sketch.get_num_retained() == 4342);
|
|
678
|
+
REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
|
|
679
|
+
REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
|
|
680
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
|
|
681
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
|
|
682
|
+
|
|
683
|
+
// the same construction process in Java must have produced exactly the same sketch
|
|
684
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
685
|
+
const int n = 8192;
|
|
686
|
+
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
687
|
+
REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
|
|
688
|
+
REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
|
|
689
|
+
REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
|
|
690
|
+
REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
|
|
691
|
+
REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
|
|
692
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
|
|
693
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
|
|
694
|
+
REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
|
|
695
|
+
REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
|
|
696
|
+
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
697
|
+
// the sketches are ordered, so the iteration sequence must match exactly
|
|
698
|
+
auto iter = sketch.begin();
|
|
699
|
+
for (const auto& key: compact_sketch) {
|
|
700
|
+
REQUIRE(*iter == key);
|
|
701
|
+
++iter;
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
|
|
277
705
|
} /* namespace datasketches */
|
|
@@ -21,6 +21,8 @@
|
|
|
21
21
|
|
|
22
22
|
#include <theta_union.hpp>
|
|
23
23
|
|
|
24
|
+
#include <stdexcept>
|
|
25
|
+
|
|
24
26
|
namespace datasketches {
|
|
25
27
|
|
|
26
28
|
TEST_CASE("theta union: empty", "[theta_union]") {
|
|
@@ -51,35 +53,41 @@ TEST_CASE("theta union: non empty no retained keys", "[theta_union]") {
|
|
|
51
53
|
}
|
|
52
54
|
|
|
53
55
|
TEST_CASE("theta union: exact mode half overlap", "[theta_union]") {
|
|
54
|
-
|
|
56
|
+
auto sketch1 = update_theta_sketch::builder().build();
|
|
55
57
|
int value = 0;
|
|
56
58
|
for (int i = 0; i < 1000; i++) sketch1.update(value++);
|
|
57
59
|
|
|
58
|
-
|
|
60
|
+
auto sketch2 = update_theta_sketch::builder().build();
|
|
59
61
|
value = 500;
|
|
60
62
|
for (int i = 0; i < 1000; i++) sketch2.update(value++);
|
|
61
63
|
|
|
62
|
-
|
|
64
|
+
auto u = theta_union::builder().build();
|
|
63
65
|
u.update(sketch1);
|
|
64
66
|
u.update(sketch2);
|
|
65
|
-
|
|
67
|
+
auto sketch3 = u.get_result();
|
|
66
68
|
REQUIRE_FALSE(sketch3.is_empty());
|
|
67
69
|
REQUIRE_FALSE(sketch3.is_estimation_mode());
|
|
68
70
|
REQUIRE(sketch3.get_estimate() == 1500.0);
|
|
71
|
+
|
|
72
|
+
u.reset();
|
|
73
|
+
sketch3 = u.get_result();
|
|
74
|
+
REQUIRE(sketch3.get_num_retained() == 0);
|
|
75
|
+
REQUIRE(sketch3.is_empty());
|
|
76
|
+
REQUIRE_FALSE(sketch3.is_estimation_mode());
|
|
69
77
|
}
|
|
70
78
|
|
|
71
79
|
TEST_CASE("theta union: exact mode half overlap wrapped compact", "[theta_union]") {
|
|
72
|
-
|
|
80
|
+
auto sketch1 = update_theta_sketch::builder().build();
|
|
73
81
|
int value = 0;
|
|
74
82
|
for (int i = 0; i < 1000; i++) sketch1.update(value++);
|
|
75
83
|
auto bytes1 = sketch1.compact().serialize();
|
|
76
84
|
|
|
77
|
-
|
|
85
|
+
auto sketch2 = update_theta_sketch::builder().build();
|
|
78
86
|
value = 500;
|
|
79
87
|
for (int i = 0; i < 1000; i++) sketch2.update(value++);
|
|
80
88
|
auto bytes2 = sketch2.compact().serialize();
|
|
81
89
|
|
|
82
|
-
|
|
90
|
+
auto u = theta_union::builder().build();
|
|
83
91
|
u.update(wrapped_compact_theta_sketch::wrap(bytes1.data(), bytes1.size()));
|
|
84
92
|
u.update(wrapped_compact_theta_sketch::wrap(bytes2.data(), bytes2.size()));
|
|
85
93
|
compact_theta_sketch sketch3 = u.get_result();
|
|
@@ -89,22 +97,28 @@ TEST_CASE("theta union: exact mode half overlap wrapped compact", "[theta_union]
|
|
|
89
97
|
}
|
|
90
98
|
|
|
91
99
|
TEST_CASE("theta union: estimation mode half overlap", "[theta_union]") {
|
|
92
|
-
|
|
100
|
+
auto sketch1 = update_theta_sketch::builder().build();
|
|
93
101
|
int value = 0;
|
|
94
102
|
for (int i = 0; i < 10000; i++) sketch1.update(value++);
|
|
95
103
|
|
|
96
|
-
|
|
104
|
+
auto sketch2 = update_theta_sketch::builder().build();
|
|
97
105
|
value = 5000;
|
|
98
106
|
for (int i = 0; i < 10000; i++) sketch2.update(value++);
|
|
99
107
|
|
|
100
|
-
|
|
108
|
+
auto u = theta_union::builder().build();
|
|
101
109
|
u.update(sketch1);
|
|
102
110
|
u.update(sketch2);
|
|
103
|
-
|
|
111
|
+
auto sketch3 = u.get_result();
|
|
104
112
|
REQUIRE_FALSE(sketch3.is_empty());
|
|
105
113
|
REQUIRE(sketch3.is_estimation_mode());
|
|
106
114
|
REQUIRE(sketch3.get_estimate() == Approx(15000).margin(15000 * 0.01));
|
|
107
115
|
//std::cerr << sketch3.to_string(true);
|
|
116
|
+
|
|
117
|
+
u.reset();
|
|
118
|
+
sketch3 = u.get_result();
|
|
119
|
+
REQUIRE(sketch3.get_num_retained() == 0);
|
|
120
|
+
REQUIRE(sketch3.is_empty());
|
|
121
|
+
REQUIRE_FALSE(sketch3.is_estimation_mode());
|
|
108
122
|
}
|
|
109
123
|
|
|
110
124
|
TEST_CASE("theta union: seed mismatch", "[theta_union]") {
|
|
@@ -32,41 +32,26 @@ target_include_directories(tuple
|
|
|
32
32
|
target_link_libraries(tuple INTERFACE common theta)
|
|
33
33
|
target_compile_features(tuple INTERFACE cxx_std_11)
|
|
34
34
|
|
|
35
|
-
set(tuple_HEADERS "")
|
|
36
|
-
list(APPEND tuple_HEADERS "include/tuple_sketch.hpp;include/tuple_sketch_impl.hpp")
|
|
37
|
-
list(APPEND tuple_HEADERS "include/tuple_union.hpp;include/tuple_union_impl.hpp")
|
|
38
|
-
list(APPEND tuple_HEADERS "include/tuple_intersection.hpp;include/tuple_intersection_impl.hpp")
|
|
39
|
-
list(APPEND tuple_HEADERS "include/tuple_a_not_b.hpp;include/tuple_a_not_b_impl.hpp")
|
|
40
|
-
list(APPEND tuple_HEADERS "include/tuple_jaccard_similarity.hpp")
|
|
41
|
-
list(APPEND tuple_HEADERS "include/array_of_doubles_sketch.hpp;include/array_of_doubles_sketch_impl.hpp")
|
|
42
|
-
list(APPEND tuple_HEADERS "include/array_of_doubles_union.hpp;include/array_of_doubles_union_impl.hpp")
|
|
43
|
-
list(APPEND tuple_HEADERS "include/array_of_doubles_intersection.hpp;include/array_of_doubles_intersection_impl.hpp")
|
|
44
|
-
list(APPEND tuple_HEADERS "include/array_of_doubles_a_not_b.hpp;include/array_of_doubles_a_not_b_impl.hpp")
|
|
45
|
-
|
|
46
35
|
install(TARGETS tuple
|
|
47
36
|
EXPORT ${PROJECT_NAME}
|
|
48
37
|
)
|
|
49
38
|
|
|
50
|
-
install(FILES
|
|
39
|
+
install(FILES
|
|
40
|
+
include/tuple_sketch.hpp
|
|
41
|
+
include/tuple_sketch_impl.hpp
|
|
42
|
+
include/tuple_union.hpp
|
|
43
|
+
include/tuple_union_impl.hpp
|
|
44
|
+
include/tuple_intersection.hpp
|
|
45
|
+
include/tuple_intersection_impl.hpp
|
|
46
|
+
include/tuple_a_not_b.hpp
|
|
47
|
+
include/tuple_a_not_b_impl.hpp
|
|
48
|
+
include/tuple_jaccard_similarity.hpp
|
|
49
|
+
include/array_of_doubles_sketch.hpp
|
|
50
|
+
include/array_of_doubles_sketch_impl.hpp
|
|
51
|
+
include/array_of_doubles_union.hpp
|
|
52
|
+
include/array_of_doubles_union_impl.hpp
|
|
53
|
+
include/array_of_doubles_intersection.hpp
|
|
54
|
+
include/array_of_doubles_intersection_impl.hpp
|
|
55
|
+
include/array_of_doubles_a_not_b.hpp
|
|
56
|
+
include/array_of_doubles_a_not_b_impl.hpp
|
|
51
57
|
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
|
52
|
-
|
|
53
|
-
target_sources(tuple
|
|
54
|
-
INTERFACE
|
|
55
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_sketch.hpp
|
|
56
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_sketch_impl.hpp
|
|
57
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_union.hpp
|
|
58
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_union_impl.hpp
|
|
59
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_intersection.hpp
|
|
60
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_intersection_impl.hpp
|
|
61
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_a_not_b.hpp
|
|
62
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_a_not_b_impl.hpp
|
|
63
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_jaccard_similarity.hpp
|
|
64
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch.hpp
|
|
65
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch_impl.hpp
|
|
66
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_union.hpp
|
|
67
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_union_impl.hpp
|
|
68
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_intersection.hpp
|
|
69
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_intersection_impl.hpp
|
|
70
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_a_not_b.hpp
|
|
71
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_a_not_b_impl.hpp
|
|
72
|
-
)
|