datasketches 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/README.md +7 -7
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
- data/vendor/datasketches-cpp/python/README.md +50 -50
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +10 -7
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
- metadata +18 -7
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
|
@@ -17,8 +17,10 @@
|
|
|
17
17
|
* under the License.
|
|
18
18
|
*/
|
|
19
19
|
|
|
20
|
+
#include <istream>
|
|
20
21
|
#include <fstream>
|
|
21
22
|
#include <sstream>
|
|
23
|
+
#include <vector>
|
|
22
24
|
|
|
23
25
|
#include <catch.hpp>
|
|
24
26
|
#include <theta_sketch.hpp>
|
|
@@ -39,6 +41,7 @@ TEST_CASE("theta sketch: empty", "[theta_sketch]") {
|
|
|
39
41
|
REQUIRE(update_sketch.get_estimate() == 0.0);
|
|
40
42
|
REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
|
|
41
43
|
REQUIRE(update_sketch.get_upper_bound(1) == 0.0);
|
|
44
|
+
REQUIRE(update_sketch.is_ordered());
|
|
42
45
|
|
|
43
46
|
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
44
47
|
REQUIRE(compact_sketch.is_empty());
|
|
@@ -47,10 +50,14 @@ TEST_CASE("theta sketch: empty", "[theta_sketch]") {
|
|
|
47
50
|
REQUIRE(compact_sketch.get_estimate() == 0.0);
|
|
48
51
|
REQUIRE(compact_sketch.get_lower_bound(1) == 0.0);
|
|
49
52
|
REQUIRE(compact_sketch.get_upper_bound(1) == 0.0);
|
|
53
|
+
REQUIRE(compact_sketch.is_ordered());
|
|
54
|
+
|
|
55
|
+
// empty is forced to be ordered
|
|
56
|
+
REQUIRE(update_sketch.compact(false).is_ordered());
|
|
50
57
|
}
|
|
51
58
|
|
|
52
59
|
TEST_CASE("theta sketch: non empty no retained keys", "[theta_sketch]") {
|
|
53
|
-
update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.
|
|
60
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.001f).build();
|
|
54
61
|
update_sketch.update(1);
|
|
55
62
|
//std::cerr << update_sketch.to_string();
|
|
56
63
|
REQUIRE(update_sketch.get_num_retained() == 0);
|
|
@@ -67,6 +74,14 @@ TEST_CASE("theta sketch: non empty no retained keys", "[theta_sketch]") {
|
|
|
67
74
|
REQUIRE(compact_sketch.get_estimate() == 0.0);
|
|
68
75
|
REQUIRE(compact_sketch.get_lower_bound(1) == 0.0);
|
|
69
76
|
REQUIRE(compact_sketch.get_upper_bound(1) > 0);
|
|
77
|
+
|
|
78
|
+
update_sketch.reset();
|
|
79
|
+
REQUIRE(update_sketch.is_empty());
|
|
80
|
+
REQUIRE_FALSE(update_sketch.is_estimation_mode());
|
|
81
|
+
REQUIRE(update_sketch.get_theta() == 1.0);
|
|
82
|
+
REQUIRE(update_sketch.get_estimate() == 0.0);
|
|
83
|
+
REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
|
|
84
|
+
REQUIRE(update_sketch.get_upper_bound(1) == 0.0);
|
|
70
85
|
}
|
|
71
86
|
|
|
72
87
|
TEST_CASE("theta sketch: single item", "[theta_sketch]") {
|
|
@@ -78,6 +93,7 @@ TEST_CASE("theta sketch: single item", "[theta_sketch]") {
|
|
|
78
93
|
REQUIRE(update_sketch.get_estimate() == 1.0);
|
|
79
94
|
REQUIRE(update_sketch.get_lower_bound(1) == 1.0);
|
|
80
95
|
REQUIRE(update_sketch.get_upper_bound(1) == 1.0);
|
|
96
|
+
REQUIRE(update_sketch.is_ordered()); // one item is ordered
|
|
81
97
|
|
|
82
98
|
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
83
99
|
REQUIRE_FALSE(compact_sketch.is_empty());
|
|
@@ -86,6 +102,10 @@ TEST_CASE("theta sketch: single item", "[theta_sketch]") {
|
|
|
86
102
|
REQUIRE(compact_sketch.get_estimate() == 1.0);
|
|
87
103
|
REQUIRE(compact_sketch.get_lower_bound(1) == 1.0);
|
|
88
104
|
REQUIRE(compact_sketch.get_upper_bound(1) == 1.0);
|
|
105
|
+
REQUIRE(compact_sketch.is_ordered());
|
|
106
|
+
|
|
107
|
+
// single item is forced to be ordered
|
|
108
|
+
REQUIRE(update_sketch.compact(false).is_ordered());
|
|
89
109
|
}
|
|
90
110
|
|
|
91
111
|
TEST_CASE("theta sketch: resize exact", "[theta_sketch]") {
|
|
@@ -97,6 +117,7 @@ TEST_CASE("theta sketch: resize exact", "[theta_sketch]") {
|
|
|
97
117
|
REQUIRE(update_sketch.get_estimate() == 2000.0);
|
|
98
118
|
REQUIRE(update_sketch.get_lower_bound(1) == 2000.0);
|
|
99
119
|
REQUIRE(update_sketch.get_upper_bound(1) == 2000.0);
|
|
120
|
+
REQUIRE_FALSE(update_sketch.is_ordered());
|
|
100
121
|
|
|
101
122
|
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
102
123
|
REQUIRE_FALSE(compact_sketch.is_empty());
|
|
@@ -105,6 +126,17 @@ TEST_CASE("theta sketch: resize exact", "[theta_sketch]") {
|
|
|
105
126
|
REQUIRE(compact_sketch.get_estimate() == 2000.0);
|
|
106
127
|
REQUIRE(compact_sketch.get_lower_bound(1) == 2000.0);
|
|
107
128
|
REQUIRE(compact_sketch.get_upper_bound(1) == 2000.0);
|
|
129
|
+
REQUIRE(compact_sketch.is_ordered());
|
|
130
|
+
|
|
131
|
+
update_sketch.reset();
|
|
132
|
+
REQUIRE(update_sketch.is_empty());
|
|
133
|
+
REQUIRE_FALSE(update_sketch.is_estimation_mode());
|
|
134
|
+
REQUIRE(update_sketch.get_theta() == 1.0);
|
|
135
|
+
REQUIRE(update_sketch.get_estimate() == 0.0);
|
|
136
|
+
REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
|
|
137
|
+
REQUIRE(update_sketch.get_upper_bound(1) == 0.0);
|
|
138
|
+
REQUIRE(update_sketch.is_ordered());
|
|
139
|
+
|
|
108
140
|
}
|
|
109
141
|
|
|
110
142
|
TEST_CASE("theta sketch: estimation", "[theta_sketch]") {
|
|
@@ -148,6 +180,34 @@ TEST_CASE("theta sketch: deserialize compact empty from java", "[theta_sketch]")
|
|
|
148
180
|
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
|
149
181
|
}
|
|
150
182
|
|
|
183
|
+
TEST_CASE("theta sketch: deserialize compact v1 empty from java", "[theta_sketch]") {
|
|
184
|
+
std::ifstream is;
|
|
185
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
186
|
+
is.open(inputPath + "theta_compact_empty_from_java_v1.sk", std::ios::binary);
|
|
187
|
+
auto sketch = compact_theta_sketch::deserialize(is);
|
|
188
|
+
REQUIRE(sketch.is_empty());
|
|
189
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
190
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
|
191
|
+
REQUIRE(sketch.get_theta() == 1.0);
|
|
192
|
+
REQUIRE(sketch.get_estimate() == 0.0);
|
|
193
|
+
REQUIRE(sketch.get_lower_bound(1) == 0.0);
|
|
194
|
+
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
TEST_CASE("theta sketch: deserialize compact v2 empty from java", "[theta_sketch]") {
|
|
198
|
+
std::ifstream is;
|
|
199
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
200
|
+
is.open(inputPath + "theta_compact_empty_from_java_v2.sk", std::ios::binary);
|
|
201
|
+
auto sketch = compact_theta_sketch::deserialize(is);
|
|
202
|
+
REQUIRE(sketch.is_empty());
|
|
203
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
204
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
|
205
|
+
REQUIRE(sketch.get_theta() == 1.0);
|
|
206
|
+
REQUIRE(sketch.get_estimate() == 0.0);
|
|
207
|
+
REQUIRE(sketch.get_lower_bound(1) == 0.0);
|
|
208
|
+
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
|
209
|
+
}
|
|
210
|
+
|
|
151
211
|
TEST_CASE("theta sketch: deserialize single item from java", "[theta_sketch]") {
|
|
152
212
|
std::ifstream is;
|
|
153
213
|
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
@@ -162,6 +222,38 @@ TEST_CASE("theta sketch: deserialize single item from java", "[theta_sketch]") {
|
|
|
162
222
|
REQUIRE(sketch.get_upper_bound(1) == 1.0);
|
|
163
223
|
}
|
|
164
224
|
|
|
225
|
+
TEST_CASE("theta sketch: deserialize compact exact from java", "[theta_sketch]") {
|
|
226
|
+
std::ifstream is;
|
|
227
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
228
|
+
is.open(inputPath + "theta_compact_exact_from_java.sk", std::ios::binary);
|
|
229
|
+
auto sketch = compact_theta_sketch::deserialize(is);
|
|
230
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
231
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
232
|
+
REQUIRE(sketch.is_ordered());
|
|
233
|
+
REQUIRE(sketch.get_num_retained() == 100);
|
|
234
|
+
|
|
235
|
+
// the same construction process in Java must have produced exactly the same sketch
|
|
236
|
+
auto update_sketch = update_theta_sketch::builder().build();
|
|
237
|
+
const int n = 100;
|
|
238
|
+
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
239
|
+
REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
|
|
240
|
+
REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
|
|
241
|
+
REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
|
|
242
|
+
REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
|
|
243
|
+
REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
|
|
244
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
|
|
245
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
|
|
246
|
+
REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
|
|
247
|
+
REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
|
|
248
|
+
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
249
|
+
// the sketches are ordered, so the iteration sequence must match exactly
|
|
250
|
+
auto iter = sketch.begin();
|
|
251
|
+
for (const auto& key: compact_sketch) {
|
|
252
|
+
REQUIRE(*iter == key);
|
|
253
|
+
++iter;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
165
257
|
TEST_CASE("theta sketch: deserialize compact estimation from java", "[theta_sketch]") {
|
|
166
258
|
std::ifstream is;
|
|
167
259
|
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
@@ -198,6 +290,78 @@ TEST_CASE("theta sketch: deserialize compact estimation from java", "[theta_sket
|
|
|
198
290
|
}
|
|
199
291
|
}
|
|
200
292
|
|
|
293
|
+
TEST_CASE("theta sketch: deserialize compact v1 estimation from java", "[theta_sketch]") {
|
|
294
|
+
std::ifstream is;
|
|
295
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
296
|
+
is.open(inputPath + "theta_compact_estimation_from_java_v1.sk", std::ios::binary);
|
|
297
|
+
auto sketch = compact_theta_sketch::deserialize(is);
|
|
298
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
299
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
300
|
+
REQUIRE(sketch.is_ordered());
|
|
301
|
+
REQUIRE(sketch.get_num_retained() == 4342);
|
|
302
|
+
REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
|
|
303
|
+
REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
|
|
304
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
|
|
305
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
|
|
306
|
+
|
|
307
|
+
// the same construction process in Java must have produced exactly the same sketch
|
|
308
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
309
|
+
const int n = 8192;
|
|
310
|
+
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
311
|
+
REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
|
|
312
|
+
REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
|
|
313
|
+
REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
|
|
314
|
+
REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
|
|
315
|
+
REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
|
|
316
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
|
|
317
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
|
|
318
|
+
REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
|
|
319
|
+
REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
|
|
320
|
+
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
321
|
+
// the sketches are ordered, so the iteration sequence must match exactly
|
|
322
|
+
auto iter = sketch.begin();
|
|
323
|
+
for (const auto& key: compact_sketch) {
|
|
324
|
+
REQUIRE(*iter == key);
|
|
325
|
+
++iter;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
TEST_CASE("theta sketch: deserialize compact v2 estimation from java", "[theta_sketch]") {
|
|
330
|
+
std::ifstream is;
|
|
331
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
332
|
+
is.open(inputPath + "theta_compact_estimation_from_java_v2.sk", std::ios::binary);
|
|
333
|
+
auto sketch = compact_theta_sketch::deserialize(is);
|
|
334
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
335
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
336
|
+
REQUIRE(sketch.is_ordered());
|
|
337
|
+
REQUIRE(sketch.get_num_retained() == 4342);
|
|
338
|
+
REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
|
|
339
|
+
REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
|
|
340
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
|
|
341
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
|
|
342
|
+
|
|
343
|
+
// the same construction process in Java must have produced exactly the same sketch
|
|
344
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
345
|
+
const int n = 8192;
|
|
346
|
+
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
347
|
+
REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
|
|
348
|
+
REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
|
|
349
|
+
REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
|
|
350
|
+
REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
|
|
351
|
+
REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
|
|
352
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
|
|
353
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
|
|
354
|
+
REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
|
|
355
|
+
REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
|
|
356
|
+
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
357
|
+
// the sketches are ordered, so the iteration sequence must match exactly
|
|
358
|
+
auto iter = sketch.begin();
|
|
359
|
+
for (const auto& key: compact_sketch) {
|
|
360
|
+
REQUIRE(*iter == key);
|
|
361
|
+
++iter;
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
201
365
|
TEST_CASE("theta sketch: serialize deserialize stream and bytes equivalence", "[theta_sketch]") {
|
|
202
366
|
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
203
367
|
const int n = 8192;
|
|
@@ -238,4 +402,276 @@ TEST_CASE("theta sketch: deserialize compact single item buffer overrun", "[thet
|
|
|
238
402
|
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
239
403
|
}
|
|
240
404
|
|
|
405
|
+
TEST_CASE("theta sketch: conversion constructor and wrapped compact", "[theta_sketch]") {
|
|
406
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
407
|
+
const int n = 8192;
|
|
408
|
+
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
409
|
+
|
|
410
|
+
// unordered
|
|
411
|
+
auto unordered_compact1 = update_sketch.compact(false);
|
|
412
|
+
compact_theta_sketch unordered_compact2(update_sketch, false);
|
|
413
|
+
auto it = unordered_compact1.begin();
|
|
414
|
+
for (auto entry: unordered_compact2) {
|
|
415
|
+
REQUIRE(*it == entry);
|
|
416
|
+
++it;
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
// ordered
|
|
420
|
+
auto ordered_compact1 = update_sketch.compact();
|
|
421
|
+
compact_theta_sketch ordered_compact2(update_sketch, true);
|
|
422
|
+
it = ordered_compact1.begin();
|
|
423
|
+
for (auto entry: ordered_compact2) {
|
|
424
|
+
REQUIRE(*it == entry);
|
|
425
|
+
++it;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// wrapped compact
|
|
429
|
+
auto bytes = ordered_compact1.serialize();
|
|
430
|
+
auto ordered_compact3 = wrapped_compact_theta_sketch::wrap(bytes.data(), bytes.size());
|
|
431
|
+
it = ordered_compact1.begin();
|
|
432
|
+
for (auto entry: ordered_compact3) {
|
|
433
|
+
REQUIRE(*it == entry);
|
|
434
|
+
++it;
|
|
435
|
+
}
|
|
436
|
+
REQUIRE(ordered_compact3.get_estimate() == ordered_compact1.get_estimate());
|
|
437
|
+
REQUIRE(ordered_compact3.get_lower_bound(1) == ordered_compact1.get_lower_bound(1));
|
|
438
|
+
REQUIRE(ordered_compact3.get_upper_bound(1) == ordered_compact1.get_upper_bound(1));
|
|
439
|
+
REQUIRE(ordered_compact3.is_estimation_mode() == ordered_compact1.is_estimation_mode());
|
|
440
|
+
REQUIRE(ordered_compact3.get_theta() == ordered_compact1.get_theta());
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
// seed mismatch
|
|
444
|
+
REQUIRE_THROWS_AS(wrapped_compact_theta_sketch::wrap(bytes.data(), bytes.size(), 0), std::invalid_argument);
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
TEST_CASE("theta sketch: wrap compact empty from java", "[theta_sketch]") {
|
|
448
|
+
std::ifstream is;
|
|
449
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
450
|
+
is.open(inputPath + "theta_compact_empty_from_java.sk", std::ios::binary | std::ios::ate);
|
|
451
|
+
|
|
452
|
+
std::vector<uint8_t> buf;
|
|
453
|
+
if(is) {
|
|
454
|
+
auto size = is.tellg();
|
|
455
|
+
buf.reserve(size);
|
|
456
|
+
buf.assign(size, 0);
|
|
457
|
+
is.seekg(0, std::ios_base::beg);
|
|
458
|
+
is.read((char*)(buf.data()), buf.size());
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
|
|
462
|
+
REQUIRE(sketch.is_empty());
|
|
463
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
464
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
|
465
|
+
REQUIRE(sketch.get_theta() == 1.0);
|
|
466
|
+
REQUIRE(sketch.get_estimate() == 0.0);
|
|
467
|
+
REQUIRE(sketch.get_lower_bound(1) == 0.0);
|
|
468
|
+
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
TEST_CASE("theta sketch: wrap compact v1 empty from java", "[theta_sketch]") {
|
|
472
|
+
std::ifstream is;
|
|
473
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
474
|
+
is.open(inputPath + "theta_compact_empty_from_java_v1.sk", std::ios::binary | std::ios::ate);
|
|
475
|
+
|
|
476
|
+
std::vector<uint8_t> buf;
|
|
477
|
+
if(is) {
|
|
478
|
+
auto size = is.tellg();
|
|
479
|
+
buf.reserve(size);
|
|
480
|
+
buf.assign(size, 0);
|
|
481
|
+
is.seekg(0, std::ios_base::beg);
|
|
482
|
+
is.read((char*)(buf.data()), buf.size());
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
|
|
486
|
+
REQUIRE(sketch.is_empty());
|
|
487
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
488
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
|
489
|
+
REQUIRE(sketch.get_theta() == 1.0);
|
|
490
|
+
REQUIRE(sketch.get_estimate() == 0.0);
|
|
491
|
+
REQUIRE(sketch.get_lower_bound(1) == 0.0);
|
|
492
|
+
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
TEST_CASE("theta sketch: wrap compact v2 empty from java", "[theta_sketch]") {
|
|
496
|
+
std::ifstream is;
|
|
497
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
498
|
+
is.open(inputPath + "theta_compact_empty_from_java_v2.sk", std::ios::binary | std::ios::ate);
|
|
499
|
+
|
|
500
|
+
std::vector<uint8_t> buf;
|
|
501
|
+
if(is) {
|
|
502
|
+
auto size = is.tellg();
|
|
503
|
+
buf.reserve(size);
|
|
504
|
+
buf.assign(size, 0);
|
|
505
|
+
is.seekg(0, std::ios_base::beg);
|
|
506
|
+
is.read((char*)(buf.data()), buf.size());
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
|
|
510
|
+
REQUIRE(sketch.is_empty());
|
|
511
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
512
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
|
513
|
+
REQUIRE(sketch.get_theta() == 1.0);
|
|
514
|
+
REQUIRE(sketch.get_estimate() == 0.0);
|
|
515
|
+
REQUIRE(sketch.get_lower_bound(1) == 0.0);
|
|
516
|
+
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
TEST_CASE("theta sketch: wrap single item from java", "[theta_sketch]") {
|
|
520
|
+
std::ifstream is;
|
|
521
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
522
|
+
is.open(inputPath + "theta_compact_single_item_from_java.sk", std::ios::binary | std::ios::ate);
|
|
523
|
+
std::vector<uint8_t> buf;
|
|
524
|
+
if(is) {
|
|
525
|
+
auto size = is.tellg();
|
|
526
|
+
buf.reserve(size);
|
|
527
|
+
buf.assign(size, 0);
|
|
528
|
+
is.seekg(0, std::ios_base::beg);
|
|
529
|
+
is.read((char*)(buf.data()), buf.size());
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
|
|
533
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
534
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
535
|
+
REQUIRE(sketch.get_num_retained() == 1);
|
|
536
|
+
REQUIRE(sketch.get_theta() == 1.0);
|
|
537
|
+
REQUIRE(sketch.get_estimate() == 1.0);
|
|
538
|
+
REQUIRE(sketch.get_lower_bound(1) == 1.0);
|
|
539
|
+
REQUIRE(sketch.get_upper_bound(1) == 1.0);
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
TEST_CASE("theta sketch: wrap compact estimation from java", "[theta_sketch]") {
|
|
543
|
+
std::ifstream is;
|
|
544
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
545
|
+
is.open(inputPath + "theta_compact_estimation_from_java.sk", std::ios::binary | std::ios::ate);
|
|
546
|
+
std::vector<uint8_t> buf;
|
|
547
|
+
if(is) {
|
|
548
|
+
auto size = is.tellg();
|
|
549
|
+
buf.reserve(size);
|
|
550
|
+
buf.assign(size, 0);
|
|
551
|
+
is.seekg(0, std::ios_base::beg);
|
|
552
|
+
is.read((char*)(buf.data()), buf.size());
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
|
|
556
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
557
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
558
|
+
REQUIRE(sketch.is_ordered());
|
|
559
|
+
REQUIRE(sketch.get_num_retained() == 4342);
|
|
560
|
+
REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
|
|
561
|
+
REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
|
|
562
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
|
|
563
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
|
|
564
|
+
|
|
565
|
+
// the same construction process in Java must have produced exactly the same sketch
|
|
566
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
567
|
+
const int n = 8192;
|
|
568
|
+
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
569
|
+
REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
|
|
570
|
+
REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
|
|
571
|
+
REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
|
|
572
|
+
REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
|
|
573
|
+
REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
|
|
574
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
|
|
575
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
|
|
576
|
+
REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
|
|
577
|
+
REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
|
|
578
|
+
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
579
|
+
// the sketches are ordered, so the iteration sequence must match exactly
|
|
580
|
+
auto iter = sketch.begin();
|
|
581
|
+
for (const auto& key: compact_sketch) {
|
|
582
|
+
REQUIRE(*iter == key);
|
|
583
|
+
++iter;
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
TEST_CASE("theta sketch: wrap compact v1 estimation from java", "[theta_sketch]") {
|
|
588
|
+
std::ifstream is;
|
|
589
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
590
|
+
is.open(inputPath + "theta_compact_estimation_from_java_v1.sk", std::ios::binary | std::ios::ate);
|
|
591
|
+
std::vector<uint8_t> buf;
|
|
592
|
+
if(is) {
|
|
593
|
+
auto size = is.tellg();
|
|
594
|
+
buf.reserve(size);
|
|
595
|
+
buf.assign(size, 0);
|
|
596
|
+
is.seekg(0, std::ios_base::beg);
|
|
597
|
+
is.read((char*)(buf.data()), buf.size());
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
|
|
601
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
602
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
603
|
+
// REQUIRE(sketch.is_ordered()); // v1 may not be ordered
|
|
604
|
+
REQUIRE(sketch.get_num_retained() == 4342);
|
|
605
|
+
REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
|
|
606
|
+
REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
|
|
607
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
|
|
608
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
|
|
609
|
+
|
|
610
|
+
// the same construction process in Java must have produced exactly the same sketch
|
|
611
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
612
|
+
const int n = 8192;
|
|
613
|
+
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
614
|
+
REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
|
|
615
|
+
REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
|
|
616
|
+
REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
|
|
617
|
+
REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
|
|
618
|
+
REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
|
|
619
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
|
|
620
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
|
|
621
|
+
REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
|
|
622
|
+
REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
|
|
623
|
+
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
624
|
+
// the sketches are ordered, so the iteration sequence must match exactly
|
|
625
|
+
auto iter = sketch.begin();
|
|
626
|
+
for (const auto& key: compact_sketch) {
|
|
627
|
+
REQUIRE(*iter == key);
|
|
628
|
+
++iter;
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
TEST_CASE("theta sketch: wrap compact v2 estimation from java", "[theta_sketch]") {
|
|
633
|
+
std::ifstream is;
|
|
634
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
635
|
+
is.open(inputPath + "theta_compact_estimation_from_java_v2.sk", std::ios::binary | std::ios::ate);
|
|
636
|
+
std::vector<uint8_t> buf;
|
|
637
|
+
if(is) {
|
|
638
|
+
auto size = is.tellg();
|
|
639
|
+
buf.reserve(size);
|
|
640
|
+
buf.assign(size, 0);
|
|
641
|
+
is.seekg(0, std::ios_base::beg);
|
|
642
|
+
is.read((char*)(buf.data()), buf.size());
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
|
|
646
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
647
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
648
|
+
// REQUIRE(sketch.is_ordered()); // v1 may not be ordered
|
|
649
|
+
REQUIRE(sketch.get_num_retained() == 4342);
|
|
650
|
+
REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
|
|
651
|
+
REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
|
|
652
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
|
|
653
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
|
|
654
|
+
|
|
655
|
+
// the same construction process in Java must have produced exactly the same sketch
|
|
656
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
657
|
+
const int n = 8192;
|
|
658
|
+
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
659
|
+
REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
|
|
660
|
+
REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
|
|
661
|
+
REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
|
|
662
|
+
REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
|
|
663
|
+
REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
|
|
664
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
|
|
665
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
|
|
666
|
+
REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
|
|
667
|
+
REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
|
|
668
|
+
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
669
|
+
// the sketches are ordered, so the iteration sequence must match exactly
|
|
670
|
+
auto iter = sketch.begin();
|
|
671
|
+
for (const auto& key: compact_sketch) {
|
|
672
|
+
REQUIRE(*iter == key);
|
|
673
|
+
++iter;
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
|
|
241
677
|
} /* namespace datasketches */
|
|
@@ -39,7 +39,7 @@ TEST_CASE("theta union: empty", "[theta_union]") {
|
|
|
39
39
|
}
|
|
40
40
|
|
|
41
41
|
TEST_CASE("theta union: non empty no retained keys", "[theta_union]") {
|
|
42
|
-
update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.
|
|
42
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.001f).build();
|
|
43
43
|
update_sketch.update(1);
|
|
44
44
|
theta_union u = theta_union::builder().build();
|
|
45
45
|
u.update(update_sketch);
|
|
@@ -51,40 +51,72 @@ TEST_CASE("theta union: non empty no retained keys", "[theta_union]") {
|
|
|
51
51
|
}
|
|
52
52
|
|
|
53
53
|
TEST_CASE("theta union: exact mode half overlap", "[theta_union]") {
|
|
54
|
-
|
|
54
|
+
auto sketch1 = update_theta_sketch::builder().build();
|
|
55
55
|
int value = 0;
|
|
56
56
|
for (int i = 0; i < 1000; i++) sketch1.update(value++);
|
|
57
57
|
|
|
58
|
-
|
|
58
|
+
auto sketch2 = update_theta_sketch::builder().build();
|
|
59
59
|
value = 500;
|
|
60
60
|
for (int i = 0; i < 1000; i++) sketch2.update(value++);
|
|
61
61
|
|
|
62
|
-
|
|
62
|
+
auto u = theta_union::builder().build();
|
|
63
63
|
u.update(sketch1);
|
|
64
64
|
u.update(sketch2);
|
|
65
|
+
auto sketch3 = u.get_result();
|
|
66
|
+
REQUIRE_FALSE(sketch3.is_empty());
|
|
67
|
+
REQUIRE_FALSE(sketch3.is_estimation_mode());
|
|
68
|
+
REQUIRE(sketch3.get_estimate() == 1500.0);
|
|
69
|
+
|
|
70
|
+
u.reset();
|
|
71
|
+
sketch3 = u.get_result();
|
|
72
|
+
REQUIRE(sketch3.get_num_retained() == 0);
|
|
73
|
+
REQUIRE(sketch3.is_empty());
|
|
74
|
+
REQUIRE_FALSE(sketch3.is_estimation_mode());
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
TEST_CASE("theta union: exact mode half overlap wrapped compact", "[theta_union]") {
|
|
78
|
+
auto sketch1 = update_theta_sketch::builder().build();
|
|
79
|
+
int value = 0;
|
|
80
|
+
for (int i = 0; i < 1000; i++) sketch1.update(value++);
|
|
81
|
+
auto bytes1 = sketch1.compact().serialize();
|
|
82
|
+
|
|
83
|
+
auto sketch2 = update_theta_sketch::builder().build();
|
|
84
|
+
value = 500;
|
|
85
|
+
for (int i = 0; i < 1000; i++) sketch2.update(value++);
|
|
86
|
+
auto bytes2 = sketch2.compact().serialize();
|
|
87
|
+
|
|
88
|
+
auto u = theta_union::builder().build();
|
|
89
|
+
u.update(wrapped_compact_theta_sketch::wrap(bytes1.data(), bytes1.size()));
|
|
90
|
+
u.update(wrapped_compact_theta_sketch::wrap(bytes2.data(), bytes2.size()));
|
|
65
91
|
compact_theta_sketch sketch3 = u.get_result();
|
|
66
92
|
REQUIRE_FALSE(sketch3.is_empty());
|
|
67
93
|
REQUIRE_FALSE(sketch3.is_estimation_mode());
|
|
68
|
-
REQUIRE(sketch3.get_estimate() ==
|
|
94
|
+
REQUIRE(sketch3.get_estimate() == 1500.0);
|
|
69
95
|
}
|
|
70
96
|
|
|
71
97
|
TEST_CASE("theta union: estimation mode half overlap", "[theta_union]") {
|
|
72
|
-
|
|
98
|
+
auto sketch1 = update_theta_sketch::builder().build();
|
|
73
99
|
int value = 0;
|
|
74
100
|
for (int i = 0; i < 10000; i++) sketch1.update(value++);
|
|
75
101
|
|
|
76
|
-
|
|
102
|
+
auto sketch2 = update_theta_sketch::builder().build();
|
|
77
103
|
value = 5000;
|
|
78
104
|
for (int i = 0; i < 10000; i++) sketch2.update(value++);
|
|
79
105
|
|
|
80
|
-
|
|
106
|
+
auto u = theta_union::builder().build();
|
|
81
107
|
u.update(sketch1);
|
|
82
108
|
u.update(sketch2);
|
|
83
|
-
|
|
109
|
+
auto sketch3 = u.get_result();
|
|
84
110
|
REQUIRE_FALSE(sketch3.is_empty());
|
|
85
111
|
REQUIRE(sketch3.is_estimation_mode());
|
|
86
112
|
REQUIRE(sketch3.get_estimate() == Approx(15000).margin(15000 * 0.01));
|
|
87
113
|
//std::cerr << sketch3.to_string(true);
|
|
114
|
+
|
|
115
|
+
u.reset();
|
|
116
|
+
sketch3 = u.get_result();
|
|
117
|
+
REQUIRE(sketch3.get_num_retained() == 0);
|
|
118
|
+
REQUIRE(sketch3.is_empty());
|
|
119
|
+
REQUIRE_FALSE(sketch3.is_estimation_mode());
|
|
88
120
|
}
|
|
89
121
|
|
|
90
122
|
TEST_CASE("theta union: seed mismatch", "[theta_union]") {
|
|
@@ -32,41 +32,26 @@ target_include_directories(tuple
|
|
|
32
32
|
target_link_libraries(tuple INTERFACE common theta)
|
|
33
33
|
target_compile_features(tuple INTERFACE cxx_std_11)
|
|
34
34
|
|
|
35
|
-
set(tuple_HEADERS "")
|
|
36
|
-
list(APPEND tuple_HEADERS "include/tuple_sketch.hpp;include/tuple_sketch_impl.hpp")
|
|
37
|
-
list(APPEND tuple_HEADERS "include/tuple_union.hpp;include/tuple_union_impl.hpp")
|
|
38
|
-
list(APPEND tuple_HEADERS "include/tuple_intersection.hpp;include/tuple_intersection_impl.hpp")
|
|
39
|
-
list(APPEND tuple_HEADERS "include/tuple_a_not_b.hpp;include/tuple_a_not_b_impl.hpp")
|
|
40
|
-
list(APPEND tuple_HEADERS "include/tuple_jaccard_similarity.hpp")
|
|
41
|
-
list(APPEND tuple_HEADERS "include/array_of_doubles_sketch.hpp;include/array_of_doubles_sketch_impl.hpp")
|
|
42
|
-
list(APPEND tuple_HEADERS "include/array_of_doubles_union.hpp;include/array_of_doubles_union_impl.hpp")
|
|
43
|
-
list(APPEND tuple_HEADERS "include/array_of_doubles_intersection.hpp;include/array_of_doubles_intersection_impl.hpp")
|
|
44
|
-
list(APPEND tuple_HEADERS "include/array_of_doubles_a_not_b.hpp;include/array_of_doubles_a_not_b_impl.hpp")
|
|
45
|
-
|
|
46
35
|
install(TARGETS tuple
|
|
47
36
|
EXPORT ${PROJECT_NAME}
|
|
48
37
|
)
|
|
49
38
|
|
|
50
|
-
install(FILES
|
|
39
|
+
install(FILES
|
|
40
|
+
include/tuple_sketch.hpp
|
|
41
|
+
include/tuple_sketch_impl.hpp
|
|
42
|
+
include/tuple_union.hpp
|
|
43
|
+
include/tuple_union_impl.hpp
|
|
44
|
+
include/tuple_intersection.hpp
|
|
45
|
+
include/tuple_intersection_impl.hpp
|
|
46
|
+
include/tuple_a_not_b.hpp
|
|
47
|
+
include/tuple_a_not_b_impl.hpp
|
|
48
|
+
include/tuple_jaccard_similarity.hpp
|
|
49
|
+
include/array_of_doubles_sketch.hpp
|
|
50
|
+
include/array_of_doubles_sketch_impl.hpp
|
|
51
|
+
include/array_of_doubles_union.hpp
|
|
52
|
+
include/array_of_doubles_union_impl.hpp
|
|
53
|
+
include/array_of_doubles_intersection.hpp
|
|
54
|
+
include/array_of_doubles_intersection_impl.hpp
|
|
55
|
+
include/array_of_doubles_a_not_b.hpp
|
|
56
|
+
include/array_of_doubles_a_not_b_impl.hpp
|
|
51
57
|
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
|
52
|
-
|
|
53
|
-
target_sources(tuple
|
|
54
|
-
INTERFACE
|
|
55
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_sketch.hpp
|
|
56
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_sketch_impl.hpp
|
|
57
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_union.hpp
|
|
58
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_union_impl.hpp
|
|
59
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_intersection.hpp
|
|
60
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_intersection_impl.hpp
|
|
61
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_a_not_b.hpp
|
|
62
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_a_not_b_impl.hpp
|
|
63
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_jaccard_similarity.hpp
|
|
64
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch.hpp
|
|
65
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch_impl.hpp
|
|
66
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_union.hpp
|
|
67
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_union_impl.hpp
|
|
68
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_intersection.hpp
|
|
69
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_intersection_impl.hpp
|
|
70
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_a_not_b.hpp
|
|
71
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_a_not_b_impl.hpp
|
|
72
|
-
)
|
|
@@ -122,7 +122,7 @@ public:
|
|
|
122
122
|
|
|
123
123
|
private:
|
|
124
124
|
// for builder
|
|
125
|
-
update_array_of_doubles_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta,
|
|
125
|
+
update_array_of_doubles_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta,
|
|
126
126
|
uint64_t seed, const array_of_doubles_update_policy<A>& policy, const A& allocator);
|
|
127
127
|
};
|
|
128
128
|
|