datasketches 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/cpc_wrapper.cpp +12 -13
- data/ext/datasketches/ext.cpp +1 -1
- data/ext/datasketches/ext.h +4 -0
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/fi_wrapper.cpp +6 -8
- data/ext/datasketches/hll_wrapper.cpp +13 -14
- data/ext/datasketches/kll_wrapper.cpp +28 -76
- data/ext/datasketches/theta_wrapper.cpp +27 -41
- data/ext/datasketches/vo_wrapper.cpp +4 -6
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/README.md +4 -4
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
- data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
- data/vendor/datasketches-cpp/python/README.md +6 -3
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
- data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
- data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
- data/vendor/datasketches-cpp/setup.py +5 -3
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
- data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
- data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
- metadata +43 -34
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
|
@@ -1,247 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
-
* or more contributor license agreements. See the NOTICE file
|
|
4
|
-
* distributed with this work for additional information
|
|
5
|
-
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
-
* to you under the Apache License, Version 2.0 (the
|
|
7
|
-
* "License"); you may not use this file except in compliance
|
|
8
|
-
* with the License. You may obtain a copy of the License at
|
|
9
|
-
*
|
|
10
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
-
*
|
|
12
|
-
* Unless required by applicable law or agreed to in writing,
|
|
13
|
-
* software distributed under the License is distributed on an
|
|
14
|
-
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
-
* KIND, either express or implied. See the License for the
|
|
16
|
-
* specific language governing permissions and limitations
|
|
17
|
-
* under the License.
|
|
18
|
-
*/
|
|
19
|
-
|
|
20
|
-
#include <fstream>
|
|
21
|
-
#include <sstream>
|
|
22
|
-
|
|
23
|
-
#include <catch.hpp>
|
|
24
|
-
#include <theta_sketch_experimental.hpp>
|
|
25
|
-
|
|
26
|
-
namespace datasketches {
|
|
27
|
-
|
|
28
|
-
#ifdef TEST_BINARY_INPUT_PATH
|
|
29
|
-
const std::string inputPath = TEST_BINARY_INPUT_PATH;
|
|
30
|
-
#else
|
|
31
|
-
const std::string inputPath = "test/";
|
|
32
|
-
#endif
|
|
33
|
-
|
|
34
|
-
// These tests have been copied from the existing theta sketch implementation.
|
|
35
|
-
// Serialization as base class and serialization of update sketch have been removed.
|
|
36
|
-
|
|
37
|
-
using update_theta_sketch = update_theta_sketch_experimental<>;
|
|
38
|
-
using compact_theta_sketch = compact_theta_sketch_experimental<>;
|
|
39
|
-
|
|
40
|
-
TEST_CASE("theta sketch: empty", "[theta_sketch]") {
|
|
41
|
-
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
42
|
-
REQUIRE(update_sketch.is_empty());
|
|
43
|
-
REQUIRE_FALSE(update_sketch.is_estimation_mode());
|
|
44
|
-
REQUIRE(update_sketch.get_theta() == 1.0);
|
|
45
|
-
REQUIRE(update_sketch.get_estimate() == 0.0);
|
|
46
|
-
REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
|
|
47
|
-
REQUIRE(update_sketch.get_upper_bound(1) == 0.0);
|
|
48
|
-
|
|
49
|
-
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
50
|
-
REQUIRE(compact_sketch.is_empty());
|
|
51
|
-
REQUIRE_FALSE(compact_sketch.is_estimation_mode());
|
|
52
|
-
REQUIRE(compact_sketch.get_theta() == 1.0);
|
|
53
|
-
REQUIRE(compact_sketch.get_estimate() == 0.0);
|
|
54
|
-
REQUIRE(compact_sketch.get_lower_bound(1) == 0.0);
|
|
55
|
-
REQUIRE(compact_sketch.get_upper_bound(1) == 0.0);
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
TEST_CASE("theta sketch: non empty no retained keys", "[theta_sketch]") {
|
|
59
|
-
update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.001).build();
|
|
60
|
-
update_sketch.update(1);
|
|
61
|
-
//std::cerr << update_sketch.to_string();
|
|
62
|
-
REQUIRE(update_sketch.get_num_retained() == 0);
|
|
63
|
-
REQUIRE_FALSE(update_sketch.is_empty());
|
|
64
|
-
REQUIRE(update_sketch.is_estimation_mode());
|
|
65
|
-
REQUIRE(update_sketch.get_estimate() == 0.0);
|
|
66
|
-
REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
|
|
67
|
-
REQUIRE(update_sketch.get_upper_bound(1) > 0);
|
|
68
|
-
|
|
69
|
-
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
70
|
-
REQUIRE(compact_sketch.get_num_retained() == 0);
|
|
71
|
-
REQUIRE_FALSE(compact_sketch.is_empty());
|
|
72
|
-
REQUIRE(compact_sketch.is_estimation_mode());
|
|
73
|
-
REQUIRE(compact_sketch.get_estimate() == 0.0);
|
|
74
|
-
REQUIRE(compact_sketch.get_lower_bound(1) == 0.0);
|
|
75
|
-
REQUIRE(compact_sketch.get_upper_bound(1) > 0);
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
TEST_CASE("theta sketch: single item", "[theta_sketch]") {
|
|
79
|
-
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
80
|
-
update_sketch.update(1);
|
|
81
|
-
REQUIRE_FALSE(update_sketch.is_empty());
|
|
82
|
-
REQUIRE_FALSE(update_sketch.is_estimation_mode());
|
|
83
|
-
REQUIRE(update_sketch.get_theta() == 1.0);
|
|
84
|
-
REQUIRE(update_sketch.get_estimate() == 1.0);
|
|
85
|
-
REQUIRE(update_sketch.get_lower_bound(1) == 1.0);
|
|
86
|
-
REQUIRE(update_sketch.get_upper_bound(1) == 1.0);
|
|
87
|
-
|
|
88
|
-
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
89
|
-
REQUIRE_FALSE(compact_sketch.is_empty());
|
|
90
|
-
REQUIRE_FALSE(compact_sketch.is_estimation_mode());
|
|
91
|
-
REQUIRE(compact_sketch.get_theta() == 1.0);
|
|
92
|
-
REQUIRE(compact_sketch.get_estimate() == 1.0);
|
|
93
|
-
REQUIRE(compact_sketch.get_lower_bound(1) == 1.0);
|
|
94
|
-
REQUIRE(compact_sketch.get_upper_bound(1) == 1.0);
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
TEST_CASE("theta sketch: resize exact", "[theta_sketch]") {
|
|
98
|
-
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
99
|
-
for (int i = 0; i < 2000; i++) update_sketch.update(i);
|
|
100
|
-
REQUIRE_FALSE(update_sketch.is_empty());
|
|
101
|
-
REQUIRE_FALSE(update_sketch.is_estimation_mode());
|
|
102
|
-
REQUIRE(update_sketch.get_theta() == 1.0);
|
|
103
|
-
REQUIRE(update_sketch.get_estimate() == 2000.0);
|
|
104
|
-
REQUIRE(update_sketch.get_lower_bound(1) == 2000.0);
|
|
105
|
-
REQUIRE(update_sketch.get_upper_bound(1) == 2000.0);
|
|
106
|
-
|
|
107
|
-
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
108
|
-
REQUIRE_FALSE(compact_sketch.is_empty());
|
|
109
|
-
REQUIRE_FALSE(compact_sketch.is_estimation_mode());
|
|
110
|
-
REQUIRE(compact_sketch.get_theta() == 1.0);
|
|
111
|
-
REQUIRE(compact_sketch.get_estimate() == 2000.0);
|
|
112
|
-
REQUIRE(compact_sketch.get_lower_bound(1) == 2000.0);
|
|
113
|
-
REQUIRE(compact_sketch.get_upper_bound(1) == 2000.0);
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
TEST_CASE("theta sketch: estimation", "[theta_sketch]") {
|
|
117
|
-
update_theta_sketch update_sketch = update_theta_sketch::builder().set_resize_factor(update_theta_sketch::resize_factor::X1).build();
|
|
118
|
-
const int n = 8000;
|
|
119
|
-
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
120
|
-
//std::cerr << update_sketch.to_string();
|
|
121
|
-
REQUIRE_FALSE(update_sketch.is_empty());
|
|
122
|
-
REQUIRE(update_sketch.is_estimation_mode());
|
|
123
|
-
REQUIRE(update_sketch.get_theta() < 1.0);
|
|
124
|
-
REQUIRE(update_sketch.get_estimate() == Approx((double) n).margin(n * 0.01));
|
|
125
|
-
REQUIRE(update_sketch.get_lower_bound(1) < n);
|
|
126
|
-
REQUIRE(update_sketch.get_upper_bound(1) > n);
|
|
127
|
-
|
|
128
|
-
const uint32_t k = 1 << update_theta_sketch::builder::DEFAULT_LG_K;
|
|
129
|
-
REQUIRE(update_sketch.get_num_retained() >= k);
|
|
130
|
-
update_sketch.trim();
|
|
131
|
-
REQUIRE(update_sketch.get_num_retained() == k);
|
|
132
|
-
|
|
133
|
-
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
134
|
-
REQUIRE_FALSE(compact_sketch.is_empty());
|
|
135
|
-
REQUIRE(compact_sketch.is_ordered());
|
|
136
|
-
REQUIRE(compact_sketch.is_estimation_mode());
|
|
137
|
-
REQUIRE(compact_sketch.get_theta() < 1.0);
|
|
138
|
-
REQUIRE(compact_sketch.get_estimate() == Approx((double) n).margin(n * 0.01));
|
|
139
|
-
REQUIRE(compact_sketch.get_lower_bound(1) < n);
|
|
140
|
-
REQUIRE(compact_sketch.get_upper_bound(1) > n);
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
TEST_CASE("theta sketch: deserialize compact empty from java", "[theta_sketch]") {
|
|
144
|
-
std::ifstream is;
|
|
145
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
146
|
-
is.open(inputPath + "theta_compact_empty_from_java.sk", std::ios::binary);
|
|
147
|
-
auto sketch = compact_theta_sketch::deserialize(is);
|
|
148
|
-
REQUIRE(sketch.is_empty());
|
|
149
|
-
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
150
|
-
REQUIRE(sketch.get_num_retained() == 0);
|
|
151
|
-
REQUIRE(sketch.get_theta() == 1.0);
|
|
152
|
-
REQUIRE(sketch.get_estimate() == 0.0);
|
|
153
|
-
REQUIRE(sketch.get_lower_bound(1) == 0.0);
|
|
154
|
-
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
TEST_CASE("theta sketch: deserialize single item from java", "[theta_sketch]") {
|
|
158
|
-
std::ifstream is;
|
|
159
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
160
|
-
is.open(inputPath + "theta_compact_single_item_from_java.sk", std::ios::binary);
|
|
161
|
-
auto sketch = compact_theta_sketch::deserialize(is);
|
|
162
|
-
REQUIRE_FALSE(sketch.is_empty());
|
|
163
|
-
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
164
|
-
REQUIRE(sketch.get_num_retained() == 1);
|
|
165
|
-
REQUIRE(sketch.get_theta() == 1.0);
|
|
166
|
-
REQUIRE(sketch.get_estimate() == 1.0);
|
|
167
|
-
REQUIRE(sketch.get_lower_bound(1) == 1.0);
|
|
168
|
-
REQUIRE(sketch.get_upper_bound(1) == 1.0);
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
TEST_CASE("theta sketch: deserialize compact estimation from java", "[theta_sketch]") {
|
|
172
|
-
std::ifstream is;
|
|
173
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
174
|
-
is.open(inputPath + "theta_compact_estimation_from_java.sk", std::ios::binary);
|
|
175
|
-
auto sketch = compact_theta_sketch::deserialize(is);
|
|
176
|
-
REQUIRE_FALSE(sketch.is_empty());
|
|
177
|
-
REQUIRE(sketch.is_estimation_mode());
|
|
178
|
-
REQUIRE(sketch.is_ordered());
|
|
179
|
-
REQUIRE(sketch.get_num_retained() == 4342);
|
|
180
|
-
REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
|
|
181
|
-
REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
|
|
182
|
-
REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
|
|
183
|
-
REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
|
|
184
|
-
|
|
185
|
-
// the same construction process in Java must have produced exactly the same sketch
|
|
186
|
-
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
187
|
-
const int n = 8192;
|
|
188
|
-
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
189
|
-
REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
|
|
190
|
-
REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
|
|
191
|
-
REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
|
|
192
|
-
REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
|
|
193
|
-
REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
|
|
194
|
-
REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
|
|
195
|
-
REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
|
|
196
|
-
REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
|
|
197
|
-
REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
|
|
198
|
-
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
199
|
-
// the sketches are ordered, so the iteration sequence must match exactly
|
|
200
|
-
auto iter = sketch.begin();
|
|
201
|
-
for (const auto& key: compact_sketch) {
|
|
202
|
-
REQUIRE(*iter == key);
|
|
203
|
-
++iter;
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
TEST_CASE("theta sketch: serialize deserialize stream and bytes equivalence", "[theta_sketch]") {
|
|
208
|
-
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
209
|
-
const int n = 8192;
|
|
210
|
-
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
211
|
-
|
|
212
|
-
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
213
|
-
update_sketch.compact().serialize(s);
|
|
214
|
-
auto bytes = update_sketch.compact().serialize();
|
|
215
|
-
REQUIRE(bytes.size() == static_cast<size_t>(s.tellp()));
|
|
216
|
-
for (size_t i = 0; i < bytes.size(); ++i) {
|
|
217
|
-
REQUIRE(((char*)bytes.data())[i] == (char)s.get());
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
s.seekg(0); // rewind
|
|
221
|
-
compact_theta_sketch deserialized_sketch1 = compact_theta_sketch::deserialize(s);
|
|
222
|
-
compact_theta_sketch deserialized_sketch2 = compact_theta_sketch::deserialize(bytes.data(), bytes.size());
|
|
223
|
-
REQUIRE(bytes.size() == static_cast<size_t>(s.tellg()));
|
|
224
|
-
REQUIRE(deserialized_sketch2.is_empty() == deserialized_sketch1.is_empty());
|
|
225
|
-
REQUIRE(deserialized_sketch2.is_ordered() == deserialized_sketch1.is_ordered());
|
|
226
|
-
REQUIRE(deserialized_sketch2.get_num_retained() == deserialized_sketch1.get_num_retained());
|
|
227
|
-
REQUIRE(deserialized_sketch2.get_theta() == deserialized_sketch1.get_theta());
|
|
228
|
-
REQUIRE(deserialized_sketch2.get_estimate() == deserialized_sketch1.get_estimate());
|
|
229
|
-
REQUIRE(deserialized_sketch2.get_lower_bound(1) == deserialized_sketch1.get_lower_bound(1));
|
|
230
|
-
REQUIRE(deserialized_sketch2.get_upper_bound(1) == deserialized_sketch1.get_upper_bound(1));
|
|
231
|
-
// the sketches are ordered, so the iteration sequence must match exactly
|
|
232
|
-
auto iter = deserialized_sketch1.begin();
|
|
233
|
-
for (auto key: deserialized_sketch2) {
|
|
234
|
-
REQUIRE(*iter == key);
|
|
235
|
-
++iter;
|
|
236
|
-
}
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
TEST_CASE("theta sketch: deserialize compact single item buffer overrun", "[theta_sketch]") {
|
|
240
|
-
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
241
|
-
update_sketch.update(1);
|
|
242
|
-
auto bytes = update_sketch.compact().serialize();
|
|
243
|
-
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 7), std::out_of_range);
|
|
244
|
-
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
} /* namespace datasketches */
|