datasketches 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/README.md +7 -7
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
- data/vendor/datasketches-cpp/python/README.md +50 -50
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +10 -7
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
- metadata +18 -7
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
|
@@ -55,15 +55,15 @@ TEST_CASE("req sketch: empty", "[req_sketch]") {
|
|
|
55
55
|
|
|
56
56
|
TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
|
|
57
57
|
req_sketch<float> sketch(12, false);
|
|
58
|
-
sketch.update(1);
|
|
58
|
+
sketch.update(1.0f);
|
|
59
59
|
REQUIRE_FALSE(sketch.is_HRA());
|
|
60
60
|
REQUIRE_FALSE(sketch.is_empty());
|
|
61
61
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
62
62
|
REQUIRE(sketch.get_n() == 1);
|
|
63
63
|
REQUIRE(sketch.get_num_retained() == 1);
|
|
64
|
-
REQUIRE(sketch.get_rank(1) == 0);
|
|
65
|
-
REQUIRE(sketch.get_rank<true>(1) == 1);
|
|
66
|
-
REQUIRE(sketch.get_rank(1.
|
|
64
|
+
REQUIRE(sketch.get_rank(1.0f) == 0);
|
|
65
|
+
REQUIRE(sketch.get_rank<true>(1.0f) == 1);
|
|
66
|
+
REQUIRE(sketch.get_rank(1.1f) == 1);
|
|
67
67
|
REQUIRE(sketch.get_rank(std::numeric_limits<float>::infinity()) == 1);
|
|
68
68
|
REQUIRE(sketch.get_quantile(0) == 1);
|
|
69
69
|
REQUIRE(sketch.get_quantile(0.5) == 1);
|
|
@@ -86,43 +86,43 @@ TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
|
|
|
86
86
|
|
|
87
87
|
TEST_CASE("req sketch: repeated values", "[req_sketch]") {
|
|
88
88
|
req_sketch<float> sketch(12);
|
|
89
|
-
sketch.update(1);
|
|
90
|
-
sketch.update(1);
|
|
91
|
-
sketch.update(1);
|
|
92
|
-
sketch.update(2);
|
|
93
|
-
sketch.update(2);
|
|
94
|
-
sketch.update(2);
|
|
89
|
+
sketch.update(1.0f);
|
|
90
|
+
sketch.update(1.0f);
|
|
91
|
+
sketch.update(1.0f);
|
|
92
|
+
sketch.update(2.0f);
|
|
93
|
+
sketch.update(2.0f);
|
|
94
|
+
sketch.update(2.0f);
|
|
95
95
|
REQUIRE_FALSE(sketch.is_empty());
|
|
96
96
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
97
97
|
REQUIRE(sketch.get_n() == 6);
|
|
98
98
|
REQUIRE(sketch.get_num_retained() == 6);
|
|
99
|
-
REQUIRE(sketch.get_rank(1) == 0);
|
|
100
|
-
REQUIRE(sketch.get_rank<true>(1) == 0.5);
|
|
101
|
-
REQUIRE(sketch.get_rank(2) == 0.5);
|
|
102
|
-
REQUIRE(sketch.get_rank<true>(2) == 1);
|
|
99
|
+
REQUIRE(sketch.get_rank(1.0f) == 0);
|
|
100
|
+
REQUIRE(sketch.get_rank<true>(1.0f) == 0.5);
|
|
101
|
+
REQUIRE(sketch.get_rank(2.0f) == 0.5);
|
|
102
|
+
REQUIRE(sketch.get_rank<true>(2.0f) == 1);
|
|
103
103
|
}
|
|
104
104
|
|
|
105
105
|
TEST_CASE("req sketch: exact mode", "[req_sketch]") {
|
|
106
106
|
req_sketch<float> sketch(12);
|
|
107
|
-
for (size_t i = 1; i <= 10; ++i) sketch.update(i);
|
|
107
|
+
for (size_t i = 1; i <= 10; ++i) sketch.update(static_cast<float>(i));
|
|
108
108
|
REQUIRE_FALSE(sketch.is_empty());
|
|
109
109
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
110
110
|
REQUIRE(sketch.get_n() == 10);
|
|
111
111
|
REQUIRE(sketch.get_num_retained() == 10);
|
|
112
112
|
|
|
113
113
|
// like KLL
|
|
114
|
-
REQUIRE(sketch.get_rank(1) == 0);
|
|
115
|
-
REQUIRE(sketch.get_rank(2) == 0.1);
|
|
116
|
-
REQUIRE(sketch.get_rank(6) == 0.5);
|
|
117
|
-
REQUIRE(sketch.get_rank(9) == 0.8);
|
|
118
|
-
REQUIRE(sketch.get_rank(10) == 0.9);
|
|
114
|
+
REQUIRE(sketch.get_rank(1.0f) == 0);
|
|
115
|
+
REQUIRE(sketch.get_rank(2.0f) == 0.1);
|
|
116
|
+
REQUIRE(sketch.get_rank(6.0f) == 0.5);
|
|
117
|
+
REQUIRE(sketch.get_rank(9.0f) == 0.8);
|
|
118
|
+
REQUIRE(sketch.get_rank(10.0f) == 0.9);
|
|
119
119
|
|
|
120
120
|
// inclusive
|
|
121
|
-
REQUIRE(sketch.get_rank<true>(1) == 0.1);
|
|
122
|
-
REQUIRE(sketch.get_rank<true>(2) == 0.2);
|
|
123
|
-
REQUIRE(sketch.get_rank<true>(5) == 0.5);
|
|
124
|
-
REQUIRE(sketch.get_rank<true>(9) == 0.9);
|
|
125
|
-
REQUIRE(sketch.get_rank<true>(10) == 1);
|
|
121
|
+
REQUIRE(sketch.get_rank<true>(1.0f) == 0.1);
|
|
122
|
+
REQUIRE(sketch.get_rank<true>(2.0f) == 0.2);
|
|
123
|
+
REQUIRE(sketch.get_rank<true>(5.0f) == 0.5);
|
|
124
|
+
REQUIRE(sketch.get_rank<true>(9.0f) == 0.9);
|
|
125
|
+
REQUIRE(sketch.get_rank<true>(10.0f) == 1);
|
|
126
126
|
|
|
127
127
|
// like KLL
|
|
128
128
|
REQUIRE(sketch.get_quantile(0) == 1);
|
|
@@ -164,16 +164,16 @@ TEST_CASE("req sketch: exact mode", "[req_sketch]") {
|
|
|
164
164
|
TEST_CASE("req sketch: estimation mode", "[req_sketch]") {
|
|
165
165
|
req_sketch<float> sketch(12);
|
|
166
166
|
const size_t n = 100000;
|
|
167
|
-
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
|
167
|
+
for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
|
|
168
168
|
REQUIRE_FALSE(sketch.is_empty());
|
|
169
169
|
REQUIRE(sketch.is_estimation_mode());
|
|
170
170
|
REQUIRE(sketch.get_n() == n);
|
|
171
171
|
// std::cout << sketch.to_string(true);
|
|
172
172
|
REQUIRE(sketch.get_num_retained() < n);
|
|
173
173
|
REQUIRE(sketch.get_rank(0) == 0);
|
|
174
|
-
REQUIRE(sketch.get_rank(n) == 1);
|
|
175
|
-
REQUIRE(sketch.get_rank(n / 2) == Approx(0.5).margin(0.01));
|
|
176
|
-
REQUIRE(sketch.get_rank(n - 1) == Approx(1).margin(0.01));
|
|
174
|
+
REQUIRE(sketch.get_rank(static_cast<float>(n)) == 1);
|
|
175
|
+
REQUIRE(sketch.get_rank(n / 2.0f) == Approx(0.5).margin(0.01));
|
|
176
|
+
REQUIRE(sketch.get_rank(n - 1.0f) == Approx(1).margin(0.01));
|
|
177
177
|
REQUIRE(sketch.get_min_value() == 0);
|
|
178
178
|
REQUIRE(sketch.get_max_value() == n - 1);
|
|
179
179
|
REQUIRE(sketch.get_rank_lower_bound(0.5, 1) < 0.5);
|
|
@@ -219,7 +219,7 @@ TEST_CASE("req sketch: byte serialize-deserialize empty", "[req_sketch]") {
|
|
|
219
219
|
|
|
220
220
|
TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]") {
|
|
221
221
|
req_sketch<float> sketch(12);
|
|
222
|
-
sketch.update(1);
|
|
222
|
+
sketch.update(1.0f);
|
|
223
223
|
|
|
224
224
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
225
225
|
sketch.serialize(s);
|
|
@@ -235,7 +235,7 @@ TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]"
|
|
|
235
235
|
|
|
236
236
|
TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]") {
|
|
237
237
|
req_sketch<float> sketch(12);
|
|
238
|
-
sketch.update(1);
|
|
238
|
+
sketch.update(1.0f);
|
|
239
239
|
|
|
240
240
|
auto bytes = sketch.serialize();
|
|
241
241
|
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
|
@@ -253,7 +253,7 @@ TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]")
|
|
|
253
253
|
TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]") {
|
|
254
254
|
req_sketch<float> sketch(12);
|
|
255
255
|
const size_t n = 50;
|
|
256
|
-
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
|
256
|
+
for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
|
|
257
257
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
258
258
|
|
|
259
259
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
@@ -271,7 +271,7 @@ TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]")
|
|
|
271
271
|
TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
|
|
272
272
|
req_sketch<float> sketch(12);
|
|
273
273
|
const size_t n = 50;
|
|
274
|
-
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
|
274
|
+
for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
|
|
275
275
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
276
276
|
|
|
277
277
|
auto bytes = sketch.serialize();
|
|
@@ -290,7 +290,7 @@ TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
|
|
|
290
290
|
TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sketch]") {
|
|
291
291
|
req_sketch<float> sketch(12);
|
|
292
292
|
const size_t n = 100000;
|
|
293
|
-
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
|
293
|
+
for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
|
|
294
294
|
REQUIRE(sketch.is_estimation_mode());
|
|
295
295
|
|
|
296
296
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
@@ -308,7 +308,7 @@ TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sket
|
|
|
308
308
|
TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch]") {
|
|
309
309
|
req_sketch<float> sketch(12);
|
|
310
310
|
const size_t n = 100000;
|
|
311
|
-
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
|
311
|
+
for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
|
|
312
312
|
REQUIRE(sketch.is_estimation_mode());
|
|
313
313
|
|
|
314
314
|
auto bytes = sketch.serialize();
|
|
@@ -326,7 +326,7 @@ TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch
|
|
|
326
326
|
TEST_CASE("req sketch: serialize deserialize stream and bytes equivalence", "[req_sketch]") {
|
|
327
327
|
req_sketch<float> sketch(12);
|
|
328
328
|
const size_t n = 100000;
|
|
329
|
-
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
|
329
|
+
for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
|
|
330
330
|
REQUIRE(sketch.is_estimation_mode());
|
|
331
331
|
|
|
332
332
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
@@ -373,8 +373,8 @@ TEST_CASE("req sketch: stream deserialize from Java - single item", "[req_sketch
|
|
|
373
373
|
REQUIRE(sketch.get_num_retained() == 1);
|
|
374
374
|
REQUIRE(sketch.get_min_value() == 1);
|
|
375
375
|
REQUIRE(sketch.get_max_value() == 1);
|
|
376
|
-
REQUIRE(sketch.get_rank(1) == 0);
|
|
377
|
-
REQUIRE(sketch.get_rank<true>(1) == 1);
|
|
376
|
+
REQUIRE(sketch.get_rank(1.0f) == 0);
|
|
377
|
+
REQUIRE(sketch.get_rank<true>(1.0f) == 1);
|
|
378
378
|
}
|
|
379
379
|
|
|
380
380
|
TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]") {
|
|
@@ -388,7 +388,7 @@ TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]"
|
|
|
388
388
|
REQUIRE(sketch.get_num_retained() == 4);
|
|
389
389
|
REQUIRE(sketch.get_min_value() == 0);
|
|
390
390
|
REQUIRE(sketch.get_max_value() == 3);
|
|
391
|
-
REQUIRE(sketch.get_rank(2) == 0.5);
|
|
391
|
+
REQUIRE(sketch.get_rank(2.0f) == 0.5);
|
|
392
392
|
}
|
|
393
393
|
|
|
394
394
|
TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]") {
|
|
@@ -402,7 +402,7 @@ TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]
|
|
|
402
402
|
REQUIRE(sketch.get_num_retained() == 100);
|
|
403
403
|
REQUIRE(sketch.get_min_value() == 0);
|
|
404
404
|
REQUIRE(sketch.get_max_value() == 99);
|
|
405
|
-
REQUIRE(sketch.get_rank(50) == 0.5);
|
|
405
|
+
REQUIRE(sketch.get_rank(50.0f) == 0.5);
|
|
406
406
|
}
|
|
407
407
|
|
|
408
408
|
TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sketch]") {
|
|
@@ -416,14 +416,14 @@ TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sk
|
|
|
416
416
|
REQUIRE(sketch.get_num_retained() == 2942);
|
|
417
417
|
REQUIRE(sketch.get_min_value() == 0);
|
|
418
418
|
REQUIRE(sketch.get_max_value() == 9999);
|
|
419
|
-
REQUIRE(sketch.get_rank(5000) == 0.5);
|
|
419
|
+
REQUIRE(sketch.get_rank(5000.0f) == 0.5);
|
|
420
420
|
}
|
|
421
421
|
|
|
422
422
|
TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
|
|
423
423
|
req_sketch<float> sketch1(40);
|
|
424
424
|
|
|
425
425
|
req_sketch<float> sketch2(40);
|
|
426
|
-
for (size_t i = 0; i < 1000; ++i) sketch2.update(i);
|
|
426
|
+
for (size_t i = 0; i < 1000; ++i) sketch2.update(static_cast<float>(i));
|
|
427
427
|
|
|
428
428
|
sketch1.merge(sketch2);
|
|
429
429
|
REQUIRE(sketch1.get_min_value() == 0);
|
|
@@ -431,15 +431,15 @@ TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
|
|
|
431
431
|
REQUIRE(sketch1.get_quantile(0.25) == Approx(250).margin(3));
|
|
432
432
|
REQUIRE(sketch1.get_quantile(0.5) == Approx(500).margin(3));
|
|
433
433
|
REQUIRE(sketch1.get_quantile(0.75) == Approx(750).margin(3));
|
|
434
|
-
REQUIRE(sketch1.get_rank(500) == Approx(0.5).margin(0.01));
|
|
434
|
+
REQUIRE(sketch1.get_rank(500.0f) == Approx(0.5).margin(0.01));
|
|
435
435
|
}
|
|
436
436
|
|
|
437
437
|
TEST_CASE("req sketch: merge", "[req_sketch]") {
|
|
438
438
|
req_sketch<float> sketch1(100);
|
|
439
|
-
for (size_t i = 0; i < 1000; ++i) sketch1.update(i);
|
|
439
|
+
for (size_t i = 0; i < 1000; ++i) sketch1.update(static_cast<float>(i));
|
|
440
440
|
|
|
441
441
|
req_sketch<float> sketch2(100);
|
|
442
|
-
for (size_t i = 1000; i < 2000; ++i) sketch2.update(i);
|
|
442
|
+
for (size_t i = 1000; i < 2000; ++i) sketch2.update(static_cast<float>(i));
|
|
443
443
|
|
|
444
444
|
sketch1.merge(sketch2);
|
|
445
445
|
REQUIRE(sketch1.get_min_value() == 0);
|
|
@@ -447,18 +447,18 @@ TEST_CASE("req sketch: merge", "[req_sketch]") {
|
|
|
447
447
|
REQUIRE(sketch1.get_quantile(0.25) == Approx(500).margin(3));
|
|
448
448
|
REQUIRE(sketch1.get_quantile(0.5) == Approx(1000).margin(1));
|
|
449
449
|
REQUIRE(sketch1.get_quantile(0.75) == Approx(1500).margin(1));
|
|
450
|
-
REQUIRE(sketch1.get_rank(1000) == Approx(0.5).margin(0.01));
|
|
450
|
+
REQUIRE(sketch1.get_rank(1000.0f) == Approx(0.5).margin(0.01));
|
|
451
451
|
}
|
|
452
452
|
|
|
453
453
|
TEST_CASE("req sketch: merge multiple", "[req_sketch]") {
|
|
454
454
|
req_sketch<float> sketch1(12);
|
|
455
|
-
for (size_t i = 0; i < 40; ++i) sketch1.update(i);
|
|
455
|
+
for (size_t i = 0; i < 40; ++i) sketch1.update(static_cast<float>(i));
|
|
456
456
|
|
|
457
457
|
req_sketch<float> sketch2(12);
|
|
458
|
-
for (size_t i = 40; i < 80; ++i) sketch2.update(i);
|
|
458
|
+
for (size_t i = 40; i < 80; ++i) sketch2.update(static_cast<float>(i));
|
|
459
459
|
|
|
460
460
|
req_sketch<float> sketch3(12);
|
|
461
|
-
for (size_t i = 80; i < 120; ++i) sketch3.update(i);
|
|
461
|
+
for (size_t i = 80; i < 120; ++i) sketch3.update(static_cast<float>(i));
|
|
462
462
|
|
|
463
463
|
req_sketch<float> sketch(12);
|
|
464
464
|
sketch.merge(sketch1);
|
|
@@ -467,15 +467,15 @@ TEST_CASE("req sketch: merge multiple", "[req_sketch]") {
|
|
|
467
467
|
REQUIRE(sketch.get_min_value() == 0);
|
|
468
468
|
REQUIRE(sketch.get_max_value() == 119);
|
|
469
469
|
REQUIRE(sketch.get_quantile(0.5) == Approx(60).margin(3));
|
|
470
|
-
REQUIRE(sketch.get_rank(60) == Approx(0.5).margin(0.01));
|
|
470
|
+
REQUIRE(sketch.get_rank(60.0f) == Approx(0.5).margin(0.01));
|
|
471
471
|
}
|
|
472
472
|
|
|
473
473
|
TEST_CASE("req sketch: merge incompatible HRA and LRA", "[req_sketch]") {
|
|
474
474
|
req_sketch<float> sketch1(12);
|
|
475
|
-
sketch1.update(1);
|
|
475
|
+
sketch1.update(1.0f);
|
|
476
476
|
|
|
477
477
|
req_sketch<float> sketch2(12, false);
|
|
478
|
-
sketch2.update(1);
|
|
478
|
+
sketch2.update(1.0f);
|
|
479
479
|
|
|
480
480
|
REQUIRE_THROWS_AS(sketch1.merge(sketch2), std::invalid_argument);
|
|
481
481
|
}
|
|
@@ -32,17 +32,13 @@ target_include_directories(sampling
|
|
|
32
32
|
target_link_libraries(sampling INTERFACE common)
|
|
33
33
|
target_compile_features(sampling INTERFACE cxx_std_11)
|
|
34
34
|
|
|
35
|
-
set(sampling_HEADERS "include/var_opt_sketch.hpp;include/var_opt_sketch_impl.hpp")
|
|
36
|
-
|
|
37
35
|
install(TARGETS sampling
|
|
38
36
|
EXPORT ${PROJECT_NAME}
|
|
39
37
|
)
|
|
40
38
|
|
|
41
|
-
install(FILES
|
|
39
|
+
install(FILES
|
|
40
|
+
include/var_opt_sketch.hpp
|
|
41
|
+
include/var_opt_sketch_impl.hpp
|
|
42
|
+
include/var_opt_union.hpp
|
|
43
|
+
include/var_opt_union_impl.hpp
|
|
42
44
|
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
|
43
|
-
|
|
44
|
-
target_sources(sampling
|
|
45
|
-
INTERFACE
|
|
46
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/var_opt_sketch.hpp
|
|
47
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/var_opt_sketch_impl.hpp
|
|
48
|
-
)
|
|
@@ -51,18 +51,23 @@ struct subset_summary {
|
|
|
51
51
|
double total_sketch_weight;
|
|
52
52
|
};
|
|
53
53
|
|
|
54
|
-
enum resize_factor { X1 = 0, X2, X4, X8 };
|
|
55
|
-
|
|
56
54
|
template <typename T, typename S, typename A> class var_opt_union; // forward declaration
|
|
57
55
|
|
|
56
|
+
namespace var_opt_constants {
|
|
57
|
+
const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
|
|
58
|
+
const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
|
|
59
|
+
}
|
|
60
|
+
|
|
58
61
|
template <typename T, typename S = serde<T>, typename A = std::allocator<T>>
|
|
59
62
|
class var_opt_sketch {
|
|
60
63
|
|
|
61
64
|
public:
|
|
62
|
-
static const resize_factor DEFAULT_RESIZE_FACTOR =
|
|
63
|
-
static const uint32_t MAX_K =
|
|
65
|
+
static const resize_factor DEFAULT_RESIZE_FACTOR = var_opt_constants::DEFAULT_RESIZE_FACTOR;
|
|
66
|
+
static const uint32_t MAX_K = var_opt_constants::MAX_K;
|
|
64
67
|
|
|
65
|
-
explicit var_opt_sketch(uint32_t k,
|
|
68
|
+
explicit var_opt_sketch(uint32_t k,
|
|
69
|
+
resize_factor rf = var_opt_constants::DEFAULT_RESIZE_FACTOR,
|
|
70
|
+
const A& allocator = A());
|
|
66
71
|
var_opt_sketch(const var_opt_sketch& other);
|
|
67
72
|
var_opt_sketch(var_opt_sketch&& other) noexcept;
|
|
68
73
|
|
|
@@ -128,7 +128,7 @@ var_opt_sketch<T,S,A>::var_opt_sketch(T* data, double* weights, size_t len,
|
|
|
128
128
|
r_(r_count),
|
|
129
129
|
n_(n),
|
|
130
130
|
total_wt_r_(total_wt_r),
|
|
131
|
-
rf_(DEFAULT_RESIZE_FACTOR),
|
|
131
|
+
rf_(var_opt_constants::DEFAULT_RESIZE_FACTOR),
|
|
132
132
|
curr_items_alloc_(len),
|
|
133
133
|
filled_data_(n > k),
|
|
134
134
|
allocator_(allocator),
|
|
@@ -334,7 +334,7 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
|
|
|
334
334
|
num_bytes += (h_ / 8) + (h_ % 8 > 0);
|
|
335
335
|
}
|
|
336
336
|
// must iterate over the items
|
|
337
|
-
for (auto
|
|
337
|
+
for (auto it: *this)
|
|
338
338
|
num_bytes += S().size_of_item(it.first);
|
|
339
339
|
return num_bytes;
|
|
340
340
|
}
|
|
@@ -359,21 +359,21 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
|
|
|
359
359
|
// first prelong
|
|
360
360
|
uint8_t ser_ver(SER_VER);
|
|
361
361
|
uint8_t family(FAMILY_ID);
|
|
362
|
-
ptr += copy_to_mem(
|
|
363
|
-
ptr += copy_to_mem(
|
|
364
|
-
ptr += copy_to_mem(
|
|
365
|
-
ptr += copy_to_mem(
|
|
366
|
-
ptr += copy_to_mem(
|
|
362
|
+
ptr += copy_to_mem(first_byte, ptr);
|
|
363
|
+
ptr += copy_to_mem(ser_ver, ptr);
|
|
364
|
+
ptr += copy_to_mem(family, ptr);
|
|
365
|
+
ptr += copy_to_mem(flags, ptr);
|
|
366
|
+
ptr += copy_to_mem(k_, ptr);
|
|
367
367
|
|
|
368
368
|
if (!empty) {
|
|
369
369
|
// second and third prelongs
|
|
370
|
-
ptr += copy_to_mem(
|
|
371
|
-
ptr += copy_to_mem(
|
|
372
|
-
ptr += copy_to_mem(
|
|
370
|
+
ptr += copy_to_mem(n_, ptr);
|
|
371
|
+
ptr += copy_to_mem(h_, ptr);
|
|
372
|
+
ptr += copy_to_mem(r_, ptr);
|
|
373
373
|
|
|
374
374
|
// fourth prelong, if needed
|
|
375
375
|
if (r_ > 0) {
|
|
376
|
-
ptr += copy_to_mem(
|
|
376
|
+
ptr += copy_to_mem(total_wt_r_, ptr);
|
|
377
377
|
}
|
|
378
378
|
|
|
379
379
|
// first h_ weights
|
|
@@ -388,14 +388,14 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
|
|
|
388
388
|
}
|
|
389
389
|
|
|
390
390
|
if ((i & 0x7) == 0x7) {
|
|
391
|
-
ptr += copy_to_mem(
|
|
391
|
+
ptr += copy_to_mem(val, ptr);
|
|
392
392
|
val = 0;
|
|
393
393
|
}
|
|
394
394
|
}
|
|
395
395
|
|
|
396
396
|
// write out any remaining values
|
|
397
397
|
if ((h_ & 0x7) > 0) {
|
|
398
|
-
ptr += copy_to_mem(
|
|
398
|
+
ptr += copy_to_mem(val, ptr);
|
|
399
399
|
}
|
|
400
400
|
}
|
|
401
401
|
|
|
@@ -428,25 +428,25 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
|
|
|
428
428
|
// first prelong
|
|
429
429
|
const uint8_t ser_ver(SER_VER);
|
|
430
430
|
const uint8_t family(FAMILY_ID);
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
431
|
+
write(os, first_byte);
|
|
432
|
+
write(os, ser_ver);
|
|
433
|
+
write(os, family);
|
|
434
|
+
write(os, flags);
|
|
435
|
+
write(os, k_);
|
|
436
436
|
|
|
437
437
|
if (!empty) {
|
|
438
438
|
// second and third prelongs
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
439
|
+
write(os, n_);
|
|
440
|
+
write(os, h_);
|
|
441
|
+
write(os, r_);
|
|
442
442
|
|
|
443
443
|
// fourth prelong, if needed
|
|
444
444
|
if (r_ > 0) {
|
|
445
|
-
|
|
445
|
+
write(os, total_wt_r_);
|
|
446
446
|
}
|
|
447
447
|
|
|
448
448
|
// write the first h_ weights
|
|
449
|
-
|
|
449
|
+
write(os, weights_, h_ * sizeof(double));
|
|
450
450
|
|
|
451
451
|
// write the first h_ marks as packed bytes iff we have a gadget
|
|
452
452
|
if (marks_ != nullptr) {
|
|
@@ -457,14 +457,14 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
|
|
|
457
457
|
}
|
|
458
458
|
|
|
459
459
|
if ((i & 0x7) == 0x7) {
|
|
460
|
-
|
|
460
|
+
write(os, val);
|
|
461
461
|
val = 0;
|
|
462
462
|
}
|
|
463
463
|
}
|
|
464
464
|
|
|
465
465
|
// write out any remaining values
|
|
466
466
|
if ((h_ & 0x7) > 0) {
|
|
467
|
-
|
|
467
|
+
write(os, val);
|
|
468
468
|
}
|
|
469
469
|
}
|
|
470
470
|
|
|
@@ -481,17 +481,17 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
|
|
|
481
481
|
const char* base = ptr;
|
|
482
482
|
const char* end_ptr = ptr + size;
|
|
483
483
|
uint8_t first_byte;
|
|
484
|
-
ptr += copy_from_mem(ptr,
|
|
484
|
+
ptr += copy_from_mem(ptr, first_byte);
|
|
485
485
|
uint8_t preamble_longs = first_byte & 0x3f;
|
|
486
486
|
resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
|
|
487
487
|
uint8_t serial_version;
|
|
488
|
-
ptr += copy_from_mem(ptr,
|
|
488
|
+
ptr += copy_from_mem(ptr, serial_version);
|
|
489
489
|
uint8_t family_id;
|
|
490
|
-
ptr += copy_from_mem(ptr,
|
|
490
|
+
ptr += copy_from_mem(ptr, family_id);
|
|
491
491
|
uint8_t flags;
|
|
492
|
-
ptr += copy_from_mem(ptr,
|
|
492
|
+
ptr += copy_from_mem(ptr, flags);
|
|
493
493
|
uint32_t k;
|
|
494
|
-
ptr += copy_from_mem(ptr,
|
|
494
|
+
ptr += copy_from_mem(ptr, k);
|
|
495
495
|
|
|
496
496
|
check_preamble_longs(preamble_longs, flags);
|
|
497
497
|
check_family_and_serialization_version(family_id, serial_version);
|
|
@@ -507,16 +507,16 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
|
|
|
507
507
|
// second and third prelongs
|
|
508
508
|
uint64_t n;
|
|
509
509
|
uint32_t h, r;
|
|
510
|
-
ptr += copy_from_mem(ptr,
|
|
511
|
-
ptr += copy_from_mem(ptr,
|
|
512
|
-
ptr += copy_from_mem(ptr,
|
|
510
|
+
ptr += copy_from_mem(ptr, n);
|
|
511
|
+
ptr += copy_from_mem(ptr, h);
|
|
512
|
+
ptr += copy_from_mem(ptr, r);
|
|
513
513
|
|
|
514
514
|
const uint32_t array_size = validate_and_get_target_size(preamble_longs, k, n, h, r, rf);
|
|
515
515
|
|
|
516
516
|
// current_items_alloc_ is set but validate R region weight (4th prelong), if needed, before allocating
|
|
517
517
|
double total_wt_r = 0.0;
|
|
518
518
|
if (preamble_longs == PREAMBLE_LONGS_FULL) {
|
|
519
|
-
ptr += copy_from_mem(ptr,
|
|
519
|
+
ptr += copy_from_mem(ptr, total_wt_r);
|
|
520
520
|
if (std::isnan(total_wt_r) || r == 0 || total_wt_r <= 0.0) {
|
|
521
521
|
throw std::invalid_argument("Possible corruption: deserializing in full mode but r = 0 or invalid R weight. "
|
|
522
522
|
"Found r = " + std::to_string(r) + ", R region weight = " + std::to_string(total_wt_r));
|
|
@@ -548,7 +548,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
|
|
|
548
548
|
check_memory_size(ptr - base + size_marks, size);
|
|
549
549
|
for (uint32_t i = 0; i < h; ++i) {
|
|
550
550
|
if ((i & 0x7) == 0x0) { // should trigger on first iteration
|
|
551
|
-
ptr += copy_from_mem(ptr,
|
|
551
|
+
ptr += copy_from_mem(ptr, val);
|
|
552
552
|
}
|
|
553
553
|
marks.get()[i] = ((val >> (i & 0x7)) & 0x1) == 1;
|
|
554
554
|
num_marks_in_h += (marks.get()[i] ? 1 : 0);
|
|
@@ -571,18 +571,13 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
|
|
|
571
571
|
|
|
572
572
|
template<typename T, typename S, typename A>
|
|
573
573
|
var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const A& allocator) {
|
|
574
|
-
|
|
575
|
-
is.read((char*)&first_byte, sizeof(first_byte));
|
|
574
|
+
const auto first_byte = read<uint8_t>(is);
|
|
576
575
|
uint8_t preamble_longs = first_byte & 0x3f;
|
|
577
|
-
resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
uint8_t
|
|
581
|
-
|
|
582
|
-
uint8_t flags;
|
|
583
|
-
is.read((char*)&flags, sizeof(flags));
|
|
584
|
-
uint32_t k;
|
|
585
|
-
is.read((char*)&k, sizeof(k));
|
|
576
|
+
const resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
|
|
577
|
+
const auto serial_version = read<uint8_t>(is);
|
|
578
|
+
const auto family_id = read<uint8_t>(is);
|
|
579
|
+
const auto flags = read<uint8_t>(is);
|
|
580
|
+
const auto k = read<uint32_t>(is);
|
|
586
581
|
|
|
587
582
|
check_preamble_longs(preamble_longs, flags);
|
|
588
583
|
check_family_and_serialization_version(family_id, serial_version);
|
|
@@ -598,31 +593,27 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const
|
|
|
598
593
|
}
|
|
599
594
|
|
|
600
595
|
// second and third prelongs
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
is.read((char*)&h, sizeof(h));
|
|
605
|
-
is.read((char*)&r, sizeof(r));
|
|
596
|
+
const auto n = read<uint64_t>(is);
|
|
597
|
+
const auto h = read<uint32_t>(is);
|
|
598
|
+
const auto r = read<uint32_t>(is);
|
|
606
599
|
|
|
607
600
|
const uint32_t array_size = validate_and_get_target_size(preamble_longs, k, n, h, r, rf);
|
|
608
601
|
|
|
609
602
|
// current_items_alloc_ is set but validate R region weight (4th prelong), if needed, before allocating
|
|
610
603
|
double total_wt_r = 0.0;
|
|
611
604
|
if (preamble_longs == PREAMBLE_LONGS_FULL) {
|
|
612
|
-
|
|
605
|
+
total_wt_r = read<double>(is);
|
|
613
606
|
if (std::isnan(total_wt_r) || r == 0 || total_wt_r <= 0.0) {
|
|
614
607
|
throw std::invalid_argument("Possible corruption: deserializing in full mode but r = 0 or invalid R weight. "
|
|
615
608
|
"Found r = " + std::to_string(r) + ", R region weight = " + std::to_string(total_wt_r));
|
|
616
609
|
}
|
|
617
|
-
} else {
|
|
618
|
-
total_wt_r = 0.0;
|
|
619
610
|
}
|
|
620
611
|
|
|
621
612
|
// read the first h weights, fill remainder with -1.0
|
|
622
613
|
std::unique_ptr<double, weights_deleter> weights(AllocDouble(allocator).allocate(array_size),
|
|
623
614
|
weights_deleter(array_size, allocator));
|
|
624
615
|
double* wts = weights.get(); // to avoid lots of .get() calls -- do not delete
|
|
625
|
-
|
|
616
|
+
read(is, wts, h * sizeof(double));
|
|
626
617
|
for (size_t i = 0; i < h; ++i) {
|
|
627
618
|
if (!(wts[i] > 0.0)) {
|
|
628
619
|
throw std::invalid_argument("Possible corruption: Non-positive weight when deserializing: " + std::to_string(wts[i]));
|
|
@@ -638,7 +629,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const
|
|
|
638
629
|
uint8_t val = 0;
|
|
639
630
|
for (uint32_t i = 0; i < h; ++i) {
|
|
640
631
|
if ((i & 0x7) == 0x0) { // should trigger on first iteration
|
|
641
|
-
|
|
632
|
+
val = read<uint8_t>(is);
|
|
642
633
|
}
|
|
643
634
|
marks.get()[i] = ((val >> (i & 0x7)) & 0x1) == 1;
|
|
644
635
|
num_marks_in_h += (marks.get()[i] ? 1 : 0);
|
|
@@ -740,8 +731,10 @@ void var_opt_sketch<T,S,A>::update(T&& item, double weight) {
|
|
|
740
731
|
|
|
741
732
|
template<typename T, typename S, typename A>
|
|
742
733
|
string<A> var_opt_sketch<T,S,A>::to_string() const {
|
|
743
|
-
|
|
744
|
-
|
|
734
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
|
735
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
|
736
|
+
std::ostringstream os;
|
|
737
|
+
os << "### VarOpt SUMMARY:" << std::endl;
|
|
745
738
|
os << " k : " << k_ << std::endl;
|
|
746
739
|
os << " h : " << h_ << std::endl;
|
|
747
740
|
os << " r : " << r_ << std::endl;
|
|
@@ -749,24 +742,28 @@ string<A> var_opt_sketch<T,S,A>::to_string() const {
|
|
|
749
742
|
os << " Current size : " << curr_items_alloc_ << std::endl;
|
|
750
743
|
os << " Resize factor: " << (1 << rf_) << std::endl;
|
|
751
744
|
os << "### END SKETCH SUMMARY" << std::endl;
|
|
752
|
-
return os.str();
|
|
745
|
+
return string<A>(os.str().c_str(), allocator_);
|
|
753
746
|
}
|
|
754
747
|
|
|
755
748
|
template<typename T, typename S, typename A>
|
|
756
749
|
string<A> var_opt_sketch<T,S,A>::items_to_string() const {
|
|
757
|
-
|
|
750
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
|
751
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
|
752
|
+
std::ostringstream os;
|
|
758
753
|
os << "### Sketch Items" << std::endl;
|
|
759
754
|
int idx = 0;
|
|
760
755
|
for (auto record : *this) {
|
|
761
756
|
os << idx << ": " << record.first << "\twt = " << record.second << std::endl;
|
|
762
757
|
++idx;
|
|
763
758
|
}
|
|
764
|
-
return os.str();
|
|
759
|
+
return string<A>(os.str().c_str(), allocator_);
|
|
765
760
|
}
|
|
766
761
|
|
|
767
762
|
template<typename T, typename S, typename A>
|
|
768
763
|
string<A> var_opt_sketch<T,S,A>::items_to_string(bool print_gap) const {
|
|
769
|
-
|
|
764
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
|
765
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
|
766
|
+
std::ostringstream os;
|
|
770
767
|
os << "### Sketch Items" << std::endl;
|
|
771
768
|
const uint32_t array_length = (n_ < k_ ? n_ : k_ + 1);
|
|
772
769
|
for (uint32_t i = 0, display_idx = 0; i < array_length; ++i) {
|
|
@@ -783,7 +780,7 @@ string<A> var_opt_sketch<T,S,A>::items_to_string(bool print_gap) const {
|
|
|
783
780
|
++display_idx;
|
|
784
781
|
}
|
|
785
782
|
}
|
|
786
|
-
return os.str();
|
|
783
|
+
return string<A>(os.str().c_str(), allocator_);
|
|
787
784
|
}
|
|
788
785
|
|
|
789
786
|
template<typename T, typename S, typename A>
|
|
@@ -1420,7 +1417,7 @@ subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(P predicate) const {
|
|
|
1420
1417
|
if (effective_sampling_rate < 0.0 || effective_sampling_rate > 1.0)
|
|
1421
1418
|
throw std::logic_error("invalid sampling rate outside [0.0, 1.0]");
|
|
1422
1419
|
|
|
1423
|
-
|
|
1420
|
+
uint32_t r_true_count = 0;
|
|
1424
1421
|
++idx; // skip the gap
|
|
1425
1422
|
for (; idx < (k_ + 1); ++idx) {
|
|
1426
1423
|
if (predicate(data_[idx])) {
|