datasketches 0.2.0 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/README.md +7 -7
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
- data/vendor/datasketches-cpp/python/README.md +50 -50
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +10 -7
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
- metadata +18 -7
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -55,15 +55,15 @@ TEST_CASE("req sketch: empty", "[req_sketch]") {
|
|
55
55
|
|
56
56
|
TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
|
57
57
|
req_sketch<float> sketch(12, false);
|
58
|
-
sketch.update(1);
|
58
|
+
sketch.update(1.0f);
|
59
59
|
REQUIRE_FALSE(sketch.is_HRA());
|
60
60
|
REQUIRE_FALSE(sketch.is_empty());
|
61
61
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
62
62
|
REQUIRE(sketch.get_n() == 1);
|
63
63
|
REQUIRE(sketch.get_num_retained() == 1);
|
64
|
-
REQUIRE(sketch.get_rank(1) == 0);
|
65
|
-
REQUIRE(sketch.get_rank<true>(1) == 1);
|
66
|
-
REQUIRE(sketch.get_rank(1.
|
64
|
+
REQUIRE(sketch.get_rank(1.0f) == 0);
|
65
|
+
REQUIRE(sketch.get_rank<true>(1.0f) == 1);
|
66
|
+
REQUIRE(sketch.get_rank(1.1f) == 1);
|
67
67
|
REQUIRE(sketch.get_rank(std::numeric_limits<float>::infinity()) == 1);
|
68
68
|
REQUIRE(sketch.get_quantile(0) == 1);
|
69
69
|
REQUIRE(sketch.get_quantile(0.5) == 1);
|
@@ -86,43 +86,43 @@ TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
|
|
86
86
|
|
87
87
|
TEST_CASE("req sketch: repeated values", "[req_sketch]") {
|
88
88
|
req_sketch<float> sketch(12);
|
89
|
-
sketch.update(1);
|
90
|
-
sketch.update(1);
|
91
|
-
sketch.update(1);
|
92
|
-
sketch.update(2);
|
93
|
-
sketch.update(2);
|
94
|
-
sketch.update(2);
|
89
|
+
sketch.update(1.0f);
|
90
|
+
sketch.update(1.0f);
|
91
|
+
sketch.update(1.0f);
|
92
|
+
sketch.update(2.0f);
|
93
|
+
sketch.update(2.0f);
|
94
|
+
sketch.update(2.0f);
|
95
95
|
REQUIRE_FALSE(sketch.is_empty());
|
96
96
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
97
97
|
REQUIRE(sketch.get_n() == 6);
|
98
98
|
REQUIRE(sketch.get_num_retained() == 6);
|
99
|
-
REQUIRE(sketch.get_rank(1) == 0);
|
100
|
-
REQUIRE(sketch.get_rank<true>(1) == 0.5);
|
101
|
-
REQUIRE(sketch.get_rank(2) == 0.5);
|
102
|
-
REQUIRE(sketch.get_rank<true>(2) == 1);
|
99
|
+
REQUIRE(sketch.get_rank(1.0f) == 0);
|
100
|
+
REQUIRE(sketch.get_rank<true>(1.0f) == 0.5);
|
101
|
+
REQUIRE(sketch.get_rank(2.0f) == 0.5);
|
102
|
+
REQUIRE(sketch.get_rank<true>(2.0f) == 1);
|
103
103
|
}
|
104
104
|
|
105
105
|
TEST_CASE("req sketch: exact mode", "[req_sketch]") {
|
106
106
|
req_sketch<float> sketch(12);
|
107
|
-
for (size_t i = 1; i <= 10; ++i) sketch.update(i);
|
107
|
+
for (size_t i = 1; i <= 10; ++i) sketch.update(static_cast<float>(i));
|
108
108
|
REQUIRE_FALSE(sketch.is_empty());
|
109
109
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
110
110
|
REQUIRE(sketch.get_n() == 10);
|
111
111
|
REQUIRE(sketch.get_num_retained() == 10);
|
112
112
|
|
113
113
|
// like KLL
|
114
|
-
REQUIRE(sketch.get_rank(1) == 0);
|
115
|
-
REQUIRE(sketch.get_rank(2) == 0.1);
|
116
|
-
REQUIRE(sketch.get_rank(6) == 0.5);
|
117
|
-
REQUIRE(sketch.get_rank(9) == 0.8);
|
118
|
-
REQUIRE(sketch.get_rank(10) == 0.9);
|
114
|
+
REQUIRE(sketch.get_rank(1.0f) == 0);
|
115
|
+
REQUIRE(sketch.get_rank(2.0f) == 0.1);
|
116
|
+
REQUIRE(sketch.get_rank(6.0f) == 0.5);
|
117
|
+
REQUIRE(sketch.get_rank(9.0f) == 0.8);
|
118
|
+
REQUIRE(sketch.get_rank(10.0f) == 0.9);
|
119
119
|
|
120
120
|
// inclusive
|
121
|
-
REQUIRE(sketch.get_rank<true>(1) == 0.1);
|
122
|
-
REQUIRE(sketch.get_rank<true>(2) == 0.2);
|
123
|
-
REQUIRE(sketch.get_rank<true>(5) == 0.5);
|
124
|
-
REQUIRE(sketch.get_rank<true>(9) == 0.9);
|
125
|
-
REQUIRE(sketch.get_rank<true>(10) == 1);
|
121
|
+
REQUIRE(sketch.get_rank<true>(1.0f) == 0.1);
|
122
|
+
REQUIRE(sketch.get_rank<true>(2.0f) == 0.2);
|
123
|
+
REQUIRE(sketch.get_rank<true>(5.0f) == 0.5);
|
124
|
+
REQUIRE(sketch.get_rank<true>(9.0f) == 0.9);
|
125
|
+
REQUIRE(sketch.get_rank<true>(10.0f) == 1);
|
126
126
|
|
127
127
|
// like KLL
|
128
128
|
REQUIRE(sketch.get_quantile(0) == 1);
|
@@ -164,16 +164,16 @@ TEST_CASE("req sketch: exact mode", "[req_sketch]") {
|
|
164
164
|
TEST_CASE("req sketch: estimation mode", "[req_sketch]") {
|
165
165
|
req_sketch<float> sketch(12);
|
166
166
|
const size_t n = 100000;
|
167
|
-
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
167
|
+
for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
|
168
168
|
REQUIRE_FALSE(sketch.is_empty());
|
169
169
|
REQUIRE(sketch.is_estimation_mode());
|
170
170
|
REQUIRE(sketch.get_n() == n);
|
171
171
|
// std::cout << sketch.to_string(true);
|
172
172
|
REQUIRE(sketch.get_num_retained() < n);
|
173
173
|
REQUIRE(sketch.get_rank(0) == 0);
|
174
|
-
REQUIRE(sketch.get_rank(n) == 1);
|
175
|
-
REQUIRE(sketch.get_rank(n / 2) == Approx(0.5).margin(0.01));
|
176
|
-
REQUIRE(sketch.get_rank(n - 1) == Approx(1).margin(0.01));
|
174
|
+
REQUIRE(sketch.get_rank(static_cast<float>(n)) == 1);
|
175
|
+
REQUIRE(sketch.get_rank(n / 2.0f) == Approx(0.5).margin(0.01));
|
176
|
+
REQUIRE(sketch.get_rank(n - 1.0f) == Approx(1).margin(0.01));
|
177
177
|
REQUIRE(sketch.get_min_value() == 0);
|
178
178
|
REQUIRE(sketch.get_max_value() == n - 1);
|
179
179
|
REQUIRE(sketch.get_rank_lower_bound(0.5, 1) < 0.5);
|
@@ -219,7 +219,7 @@ TEST_CASE("req sketch: byte serialize-deserialize empty", "[req_sketch]") {
|
|
219
219
|
|
220
220
|
TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]") {
|
221
221
|
req_sketch<float> sketch(12);
|
222
|
-
sketch.update(1);
|
222
|
+
sketch.update(1.0f);
|
223
223
|
|
224
224
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
225
225
|
sketch.serialize(s);
|
@@ -235,7 +235,7 @@ TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]"
|
|
235
235
|
|
236
236
|
TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]") {
|
237
237
|
req_sketch<float> sketch(12);
|
238
|
-
sketch.update(1);
|
238
|
+
sketch.update(1.0f);
|
239
239
|
|
240
240
|
auto bytes = sketch.serialize();
|
241
241
|
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
@@ -253,7 +253,7 @@ TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]")
|
|
253
253
|
TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]") {
|
254
254
|
req_sketch<float> sketch(12);
|
255
255
|
const size_t n = 50;
|
256
|
-
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
256
|
+
for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
|
257
257
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
258
258
|
|
259
259
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
@@ -271,7 +271,7 @@ TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]")
|
|
271
271
|
TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
|
272
272
|
req_sketch<float> sketch(12);
|
273
273
|
const size_t n = 50;
|
274
|
-
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
274
|
+
for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
|
275
275
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
276
276
|
|
277
277
|
auto bytes = sketch.serialize();
|
@@ -290,7 +290,7 @@ TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
|
|
290
290
|
TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sketch]") {
|
291
291
|
req_sketch<float> sketch(12);
|
292
292
|
const size_t n = 100000;
|
293
|
-
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
293
|
+
for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
|
294
294
|
REQUIRE(sketch.is_estimation_mode());
|
295
295
|
|
296
296
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
@@ -308,7 +308,7 @@ TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sket
|
|
308
308
|
TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch]") {
|
309
309
|
req_sketch<float> sketch(12);
|
310
310
|
const size_t n = 100000;
|
311
|
-
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
311
|
+
for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
|
312
312
|
REQUIRE(sketch.is_estimation_mode());
|
313
313
|
|
314
314
|
auto bytes = sketch.serialize();
|
@@ -326,7 +326,7 @@ TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch
|
|
326
326
|
TEST_CASE("req sketch: serialize deserialize stream and bytes equivalence", "[req_sketch]") {
|
327
327
|
req_sketch<float> sketch(12);
|
328
328
|
const size_t n = 100000;
|
329
|
-
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
329
|
+
for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
|
330
330
|
REQUIRE(sketch.is_estimation_mode());
|
331
331
|
|
332
332
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
@@ -373,8 +373,8 @@ TEST_CASE("req sketch: stream deserialize from Java - single item", "[req_sketch
|
|
373
373
|
REQUIRE(sketch.get_num_retained() == 1);
|
374
374
|
REQUIRE(sketch.get_min_value() == 1);
|
375
375
|
REQUIRE(sketch.get_max_value() == 1);
|
376
|
-
REQUIRE(sketch.get_rank(1) == 0);
|
377
|
-
REQUIRE(sketch.get_rank<true>(1) == 1);
|
376
|
+
REQUIRE(sketch.get_rank(1.0f) == 0);
|
377
|
+
REQUIRE(sketch.get_rank<true>(1.0f) == 1);
|
378
378
|
}
|
379
379
|
|
380
380
|
TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]") {
|
@@ -388,7 +388,7 @@ TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]"
|
|
388
388
|
REQUIRE(sketch.get_num_retained() == 4);
|
389
389
|
REQUIRE(sketch.get_min_value() == 0);
|
390
390
|
REQUIRE(sketch.get_max_value() == 3);
|
391
|
-
REQUIRE(sketch.get_rank(2) == 0.5);
|
391
|
+
REQUIRE(sketch.get_rank(2.0f) == 0.5);
|
392
392
|
}
|
393
393
|
|
394
394
|
TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]") {
|
@@ -402,7 +402,7 @@ TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]
|
|
402
402
|
REQUIRE(sketch.get_num_retained() == 100);
|
403
403
|
REQUIRE(sketch.get_min_value() == 0);
|
404
404
|
REQUIRE(sketch.get_max_value() == 99);
|
405
|
-
REQUIRE(sketch.get_rank(50) == 0.5);
|
405
|
+
REQUIRE(sketch.get_rank(50.0f) == 0.5);
|
406
406
|
}
|
407
407
|
|
408
408
|
TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sketch]") {
|
@@ -416,14 +416,14 @@ TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sk
|
|
416
416
|
REQUIRE(sketch.get_num_retained() == 2942);
|
417
417
|
REQUIRE(sketch.get_min_value() == 0);
|
418
418
|
REQUIRE(sketch.get_max_value() == 9999);
|
419
|
-
REQUIRE(sketch.get_rank(5000) == 0.5);
|
419
|
+
REQUIRE(sketch.get_rank(5000.0f) == 0.5);
|
420
420
|
}
|
421
421
|
|
422
422
|
TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
|
423
423
|
req_sketch<float> sketch1(40);
|
424
424
|
|
425
425
|
req_sketch<float> sketch2(40);
|
426
|
-
for (size_t i = 0; i < 1000; ++i) sketch2.update(i);
|
426
|
+
for (size_t i = 0; i < 1000; ++i) sketch2.update(static_cast<float>(i));
|
427
427
|
|
428
428
|
sketch1.merge(sketch2);
|
429
429
|
REQUIRE(sketch1.get_min_value() == 0);
|
@@ -431,15 +431,15 @@ TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
|
|
431
431
|
REQUIRE(sketch1.get_quantile(0.25) == Approx(250).margin(3));
|
432
432
|
REQUIRE(sketch1.get_quantile(0.5) == Approx(500).margin(3));
|
433
433
|
REQUIRE(sketch1.get_quantile(0.75) == Approx(750).margin(3));
|
434
|
-
REQUIRE(sketch1.get_rank(500) == Approx(0.5).margin(0.01));
|
434
|
+
REQUIRE(sketch1.get_rank(500.0f) == Approx(0.5).margin(0.01));
|
435
435
|
}
|
436
436
|
|
437
437
|
TEST_CASE("req sketch: merge", "[req_sketch]") {
|
438
438
|
req_sketch<float> sketch1(100);
|
439
|
-
for (size_t i = 0; i < 1000; ++i) sketch1.update(i);
|
439
|
+
for (size_t i = 0; i < 1000; ++i) sketch1.update(static_cast<float>(i));
|
440
440
|
|
441
441
|
req_sketch<float> sketch2(100);
|
442
|
-
for (size_t i = 1000; i < 2000; ++i) sketch2.update(i);
|
442
|
+
for (size_t i = 1000; i < 2000; ++i) sketch2.update(static_cast<float>(i));
|
443
443
|
|
444
444
|
sketch1.merge(sketch2);
|
445
445
|
REQUIRE(sketch1.get_min_value() == 0);
|
@@ -447,18 +447,18 @@ TEST_CASE("req sketch: merge", "[req_sketch]") {
|
|
447
447
|
REQUIRE(sketch1.get_quantile(0.25) == Approx(500).margin(3));
|
448
448
|
REQUIRE(sketch1.get_quantile(0.5) == Approx(1000).margin(1));
|
449
449
|
REQUIRE(sketch1.get_quantile(0.75) == Approx(1500).margin(1));
|
450
|
-
REQUIRE(sketch1.get_rank(1000) == Approx(0.5).margin(0.01));
|
450
|
+
REQUIRE(sketch1.get_rank(1000.0f) == Approx(0.5).margin(0.01));
|
451
451
|
}
|
452
452
|
|
453
453
|
TEST_CASE("req sketch: merge multiple", "[req_sketch]") {
|
454
454
|
req_sketch<float> sketch1(12);
|
455
|
-
for (size_t i = 0; i < 40; ++i) sketch1.update(i);
|
455
|
+
for (size_t i = 0; i < 40; ++i) sketch1.update(static_cast<float>(i));
|
456
456
|
|
457
457
|
req_sketch<float> sketch2(12);
|
458
|
-
for (size_t i = 40; i < 80; ++i) sketch2.update(i);
|
458
|
+
for (size_t i = 40; i < 80; ++i) sketch2.update(static_cast<float>(i));
|
459
459
|
|
460
460
|
req_sketch<float> sketch3(12);
|
461
|
-
for (size_t i = 80; i < 120; ++i) sketch3.update(i);
|
461
|
+
for (size_t i = 80; i < 120; ++i) sketch3.update(static_cast<float>(i));
|
462
462
|
|
463
463
|
req_sketch<float> sketch(12);
|
464
464
|
sketch.merge(sketch1);
|
@@ -467,15 +467,15 @@ TEST_CASE("req sketch: merge multiple", "[req_sketch]") {
|
|
467
467
|
REQUIRE(sketch.get_min_value() == 0);
|
468
468
|
REQUIRE(sketch.get_max_value() == 119);
|
469
469
|
REQUIRE(sketch.get_quantile(0.5) == Approx(60).margin(3));
|
470
|
-
REQUIRE(sketch.get_rank(60) == Approx(0.5).margin(0.01));
|
470
|
+
REQUIRE(sketch.get_rank(60.0f) == Approx(0.5).margin(0.01));
|
471
471
|
}
|
472
472
|
|
473
473
|
TEST_CASE("req sketch: merge incompatible HRA and LRA", "[req_sketch]") {
|
474
474
|
req_sketch<float> sketch1(12);
|
475
|
-
sketch1.update(1);
|
475
|
+
sketch1.update(1.0f);
|
476
476
|
|
477
477
|
req_sketch<float> sketch2(12, false);
|
478
|
-
sketch2.update(1);
|
478
|
+
sketch2.update(1.0f);
|
479
479
|
|
480
480
|
REQUIRE_THROWS_AS(sketch1.merge(sketch2), std::invalid_argument);
|
481
481
|
}
|
@@ -32,17 +32,13 @@ target_include_directories(sampling
|
|
32
32
|
target_link_libraries(sampling INTERFACE common)
|
33
33
|
target_compile_features(sampling INTERFACE cxx_std_11)
|
34
34
|
|
35
|
-
set(sampling_HEADERS "include/var_opt_sketch.hpp;include/var_opt_sketch_impl.hpp")
|
36
|
-
|
37
35
|
install(TARGETS sampling
|
38
36
|
EXPORT ${PROJECT_NAME}
|
39
37
|
)
|
40
38
|
|
41
|
-
install(FILES
|
39
|
+
install(FILES
|
40
|
+
include/var_opt_sketch.hpp
|
41
|
+
include/var_opt_sketch_impl.hpp
|
42
|
+
include/var_opt_union.hpp
|
43
|
+
include/var_opt_union_impl.hpp
|
42
44
|
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
43
|
-
|
44
|
-
target_sources(sampling
|
45
|
-
INTERFACE
|
46
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/var_opt_sketch.hpp
|
47
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/var_opt_sketch_impl.hpp
|
48
|
-
)
|
@@ -51,18 +51,23 @@ struct subset_summary {
|
|
51
51
|
double total_sketch_weight;
|
52
52
|
};
|
53
53
|
|
54
|
-
enum resize_factor { X1 = 0, X2, X4, X8 };
|
55
|
-
|
56
54
|
template <typename T, typename S, typename A> class var_opt_union; // forward declaration
|
57
55
|
|
56
|
+
namespace var_opt_constants {
|
57
|
+
const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
|
58
|
+
const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
|
59
|
+
}
|
60
|
+
|
58
61
|
template <typename T, typename S = serde<T>, typename A = std::allocator<T>>
|
59
62
|
class var_opt_sketch {
|
60
63
|
|
61
64
|
public:
|
62
|
-
static const resize_factor DEFAULT_RESIZE_FACTOR =
|
63
|
-
static const uint32_t MAX_K =
|
65
|
+
static const resize_factor DEFAULT_RESIZE_FACTOR = var_opt_constants::DEFAULT_RESIZE_FACTOR;
|
66
|
+
static const uint32_t MAX_K = var_opt_constants::MAX_K;
|
64
67
|
|
65
|
-
explicit var_opt_sketch(uint32_t k,
|
68
|
+
explicit var_opt_sketch(uint32_t k,
|
69
|
+
resize_factor rf = var_opt_constants::DEFAULT_RESIZE_FACTOR,
|
70
|
+
const A& allocator = A());
|
66
71
|
var_opt_sketch(const var_opt_sketch& other);
|
67
72
|
var_opt_sketch(var_opt_sketch&& other) noexcept;
|
68
73
|
|
@@ -128,7 +128,7 @@ var_opt_sketch<T,S,A>::var_opt_sketch(T* data, double* weights, size_t len,
|
|
128
128
|
r_(r_count),
|
129
129
|
n_(n),
|
130
130
|
total_wt_r_(total_wt_r),
|
131
|
-
rf_(DEFAULT_RESIZE_FACTOR),
|
131
|
+
rf_(var_opt_constants::DEFAULT_RESIZE_FACTOR),
|
132
132
|
curr_items_alloc_(len),
|
133
133
|
filled_data_(n > k),
|
134
134
|
allocator_(allocator),
|
@@ -334,7 +334,7 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
|
|
334
334
|
num_bytes += (h_ / 8) + (h_ % 8 > 0);
|
335
335
|
}
|
336
336
|
// must iterate over the items
|
337
|
-
for (auto
|
337
|
+
for (auto it: *this)
|
338
338
|
num_bytes += S().size_of_item(it.first);
|
339
339
|
return num_bytes;
|
340
340
|
}
|
@@ -359,21 +359,21 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
|
|
359
359
|
// first prelong
|
360
360
|
uint8_t ser_ver(SER_VER);
|
361
361
|
uint8_t family(FAMILY_ID);
|
362
|
-
ptr += copy_to_mem(
|
363
|
-
ptr += copy_to_mem(
|
364
|
-
ptr += copy_to_mem(
|
365
|
-
ptr += copy_to_mem(
|
366
|
-
ptr += copy_to_mem(
|
362
|
+
ptr += copy_to_mem(first_byte, ptr);
|
363
|
+
ptr += copy_to_mem(ser_ver, ptr);
|
364
|
+
ptr += copy_to_mem(family, ptr);
|
365
|
+
ptr += copy_to_mem(flags, ptr);
|
366
|
+
ptr += copy_to_mem(k_, ptr);
|
367
367
|
|
368
368
|
if (!empty) {
|
369
369
|
// second and third prelongs
|
370
|
-
ptr += copy_to_mem(
|
371
|
-
ptr += copy_to_mem(
|
372
|
-
ptr += copy_to_mem(
|
370
|
+
ptr += copy_to_mem(n_, ptr);
|
371
|
+
ptr += copy_to_mem(h_, ptr);
|
372
|
+
ptr += copy_to_mem(r_, ptr);
|
373
373
|
|
374
374
|
// fourth prelong, if needed
|
375
375
|
if (r_ > 0) {
|
376
|
-
ptr += copy_to_mem(
|
376
|
+
ptr += copy_to_mem(total_wt_r_, ptr);
|
377
377
|
}
|
378
378
|
|
379
379
|
// first h_ weights
|
@@ -388,14 +388,14 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
|
|
388
388
|
}
|
389
389
|
|
390
390
|
if ((i & 0x7) == 0x7) {
|
391
|
-
ptr += copy_to_mem(
|
391
|
+
ptr += copy_to_mem(val, ptr);
|
392
392
|
val = 0;
|
393
393
|
}
|
394
394
|
}
|
395
395
|
|
396
396
|
// write out any remaining values
|
397
397
|
if ((h_ & 0x7) > 0) {
|
398
|
-
ptr += copy_to_mem(
|
398
|
+
ptr += copy_to_mem(val, ptr);
|
399
399
|
}
|
400
400
|
}
|
401
401
|
|
@@ -428,25 +428,25 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
|
|
428
428
|
// first prelong
|
429
429
|
const uint8_t ser_ver(SER_VER);
|
430
430
|
const uint8_t family(FAMILY_ID);
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
431
|
+
write(os, first_byte);
|
432
|
+
write(os, ser_ver);
|
433
|
+
write(os, family);
|
434
|
+
write(os, flags);
|
435
|
+
write(os, k_);
|
436
436
|
|
437
437
|
if (!empty) {
|
438
438
|
// second and third prelongs
|
439
|
-
|
440
|
-
|
441
|
-
|
439
|
+
write(os, n_);
|
440
|
+
write(os, h_);
|
441
|
+
write(os, r_);
|
442
442
|
|
443
443
|
// fourth prelong, if needed
|
444
444
|
if (r_ > 0) {
|
445
|
-
|
445
|
+
write(os, total_wt_r_);
|
446
446
|
}
|
447
447
|
|
448
448
|
// write the first h_ weights
|
449
|
-
|
449
|
+
write(os, weights_, h_ * sizeof(double));
|
450
450
|
|
451
451
|
// write the first h_ marks as packed bytes iff we have a gadget
|
452
452
|
if (marks_ != nullptr) {
|
@@ -457,14 +457,14 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
|
|
457
457
|
}
|
458
458
|
|
459
459
|
if ((i & 0x7) == 0x7) {
|
460
|
-
|
460
|
+
write(os, val);
|
461
461
|
val = 0;
|
462
462
|
}
|
463
463
|
}
|
464
464
|
|
465
465
|
// write out any remaining values
|
466
466
|
if ((h_ & 0x7) > 0) {
|
467
|
-
|
467
|
+
write(os, val);
|
468
468
|
}
|
469
469
|
}
|
470
470
|
|
@@ -481,17 +481,17 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
|
|
481
481
|
const char* base = ptr;
|
482
482
|
const char* end_ptr = ptr + size;
|
483
483
|
uint8_t first_byte;
|
484
|
-
ptr += copy_from_mem(ptr,
|
484
|
+
ptr += copy_from_mem(ptr, first_byte);
|
485
485
|
uint8_t preamble_longs = first_byte & 0x3f;
|
486
486
|
resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
|
487
487
|
uint8_t serial_version;
|
488
|
-
ptr += copy_from_mem(ptr,
|
488
|
+
ptr += copy_from_mem(ptr, serial_version);
|
489
489
|
uint8_t family_id;
|
490
|
-
ptr += copy_from_mem(ptr,
|
490
|
+
ptr += copy_from_mem(ptr, family_id);
|
491
491
|
uint8_t flags;
|
492
|
-
ptr += copy_from_mem(ptr,
|
492
|
+
ptr += copy_from_mem(ptr, flags);
|
493
493
|
uint32_t k;
|
494
|
-
ptr += copy_from_mem(ptr,
|
494
|
+
ptr += copy_from_mem(ptr, k);
|
495
495
|
|
496
496
|
check_preamble_longs(preamble_longs, flags);
|
497
497
|
check_family_and_serialization_version(family_id, serial_version);
|
@@ -507,16 +507,16 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
|
|
507
507
|
// second and third prelongs
|
508
508
|
uint64_t n;
|
509
509
|
uint32_t h, r;
|
510
|
-
ptr += copy_from_mem(ptr,
|
511
|
-
ptr += copy_from_mem(ptr,
|
512
|
-
ptr += copy_from_mem(ptr,
|
510
|
+
ptr += copy_from_mem(ptr, n);
|
511
|
+
ptr += copy_from_mem(ptr, h);
|
512
|
+
ptr += copy_from_mem(ptr, r);
|
513
513
|
|
514
514
|
const uint32_t array_size = validate_and_get_target_size(preamble_longs, k, n, h, r, rf);
|
515
515
|
|
516
516
|
// current_items_alloc_ is set but validate R region weight (4th prelong), if needed, before allocating
|
517
517
|
double total_wt_r = 0.0;
|
518
518
|
if (preamble_longs == PREAMBLE_LONGS_FULL) {
|
519
|
-
ptr += copy_from_mem(ptr,
|
519
|
+
ptr += copy_from_mem(ptr, total_wt_r);
|
520
520
|
if (std::isnan(total_wt_r) || r == 0 || total_wt_r <= 0.0) {
|
521
521
|
throw std::invalid_argument("Possible corruption: deserializing in full mode but r = 0 or invalid R weight. "
|
522
522
|
"Found r = " + std::to_string(r) + ", R region weight = " + std::to_string(total_wt_r));
|
@@ -548,7 +548,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
|
|
548
548
|
check_memory_size(ptr - base + size_marks, size);
|
549
549
|
for (uint32_t i = 0; i < h; ++i) {
|
550
550
|
if ((i & 0x7) == 0x0) { // should trigger on first iteration
|
551
|
-
ptr += copy_from_mem(ptr,
|
551
|
+
ptr += copy_from_mem(ptr, val);
|
552
552
|
}
|
553
553
|
marks.get()[i] = ((val >> (i & 0x7)) & 0x1) == 1;
|
554
554
|
num_marks_in_h += (marks.get()[i] ? 1 : 0);
|
@@ -571,18 +571,13 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
|
|
571
571
|
|
572
572
|
template<typename T, typename S, typename A>
|
573
573
|
var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const A& allocator) {
|
574
|
-
|
575
|
-
is.read((char*)&first_byte, sizeof(first_byte));
|
574
|
+
const auto first_byte = read<uint8_t>(is);
|
576
575
|
uint8_t preamble_longs = first_byte & 0x3f;
|
577
|
-
resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
|
578
|
-
|
579
|
-
|
580
|
-
uint8_t
|
581
|
-
|
582
|
-
uint8_t flags;
|
583
|
-
is.read((char*)&flags, sizeof(flags));
|
584
|
-
uint32_t k;
|
585
|
-
is.read((char*)&k, sizeof(k));
|
576
|
+
const resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
|
577
|
+
const auto serial_version = read<uint8_t>(is);
|
578
|
+
const auto family_id = read<uint8_t>(is);
|
579
|
+
const auto flags = read<uint8_t>(is);
|
580
|
+
const auto k = read<uint32_t>(is);
|
586
581
|
|
587
582
|
check_preamble_longs(preamble_longs, flags);
|
588
583
|
check_family_and_serialization_version(family_id, serial_version);
|
@@ -598,31 +593,27 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const
|
|
598
593
|
}
|
599
594
|
|
600
595
|
// second and third prelongs
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
is.read((char*)&h, sizeof(h));
|
605
|
-
is.read((char*)&r, sizeof(r));
|
596
|
+
const auto n = read<uint64_t>(is);
|
597
|
+
const auto h = read<uint32_t>(is);
|
598
|
+
const auto r = read<uint32_t>(is);
|
606
599
|
|
607
600
|
const uint32_t array_size = validate_and_get_target_size(preamble_longs, k, n, h, r, rf);
|
608
601
|
|
609
602
|
// current_items_alloc_ is set but validate R region weight (4th prelong), if needed, before allocating
|
610
603
|
double total_wt_r = 0.0;
|
611
604
|
if (preamble_longs == PREAMBLE_LONGS_FULL) {
|
612
|
-
|
605
|
+
total_wt_r = read<double>(is);
|
613
606
|
if (std::isnan(total_wt_r) || r == 0 || total_wt_r <= 0.0) {
|
614
607
|
throw std::invalid_argument("Possible corruption: deserializing in full mode but r = 0 or invalid R weight. "
|
615
608
|
"Found r = " + std::to_string(r) + ", R region weight = " + std::to_string(total_wt_r));
|
616
609
|
}
|
617
|
-
} else {
|
618
|
-
total_wt_r = 0.0;
|
619
610
|
}
|
620
611
|
|
621
612
|
// read the first h weights, fill remainder with -1.0
|
622
613
|
std::unique_ptr<double, weights_deleter> weights(AllocDouble(allocator).allocate(array_size),
|
623
614
|
weights_deleter(array_size, allocator));
|
624
615
|
double* wts = weights.get(); // to avoid lots of .get() calls -- do not delete
|
625
|
-
|
616
|
+
read(is, wts, h * sizeof(double));
|
626
617
|
for (size_t i = 0; i < h; ++i) {
|
627
618
|
if (!(wts[i] > 0.0)) {
|
628
619
|
throw std::invalid_argument("Possible corruption: Non-positive weight when deserializing: " + std::to_string(wts[i]));
|
@@ -638,7 +629,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const
|
|
638
629
|
uint8_t val = 0;
|
639
630
|
for (uint32_t i = 0; i < h; ++i) {
|
640
631
|
if ((i & 0x7) == 0x0) { // should trigger on first iteration
|
641
|
-
|
632
|
+
val = read<uint8_t>(is);
|
642
633
|
}
|
643
634
|
marks.get()[i] = ((val >> (i & 0x7)) & 0x1) == 1;
|
644
635
|
num_marks_in_h += (marks.get()[i] ? 1 : 0);
|
@@ -740,8 +731,10 @@ void var_opt_sketch<T,S,A>::update(T&& item, double weight) {
|
|
740
731
|
|
741
732
|
template<typename T, typename S, typename A>
|
742
733
|
string<A> var_opt_sketch<T,S,A>::to_string() const {
|
743
|
-
|
744
|
-
|
734
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
735
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
736
|
+
std::ostringstream os;
|
737
|
+
os << "### VarOpt SUMMARY:" << std::endl;
|
745
738
|
os << " k : " << k_ << std::endl;
|
746
739
|
os << " h : " << h_ << std::endl;
|
747
740
|
os << " r : " << r_ << std::endl;
|
@@ -749,24 +742,28 @@ string<A> var_opt_sketch<T,S,A>::to_string() const {
|
|
749
742
|
os << " Current size : " << curr_items_alloc_ << std::endl;
|
750
743
|
os << " Resize factor: " << (1 << rf_) << std::endl;
|
751
744
|
os << "### END SKETCH SUMMARY" << std::endl;
|
752
|
-
return os.str();
|
745
|
+
return string<A>(os.str().c_str(), allocator_);
|
753
746
|
}
|
754
747
|
|
755
748
|
template<typename T, typename S, typename A>
|
756
749
|
string<A> var_opt_sketch<T,S,A>::items_to_string() const {
|
757
|
-
|
750
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
751
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
752
|
+
std::ostringstream os;
|
758
753
|
os << "### Sketch Items" << std::endl;
|
759
754
|
int idx = 0;
|
760
755
|
for (auto record : *this) {
|
761
756
|
os << idx << ": " << record.first << "\twt = " << record.second << std::endl;
|
762
757
|
++idx;
|
763
758
|
}
|
764
|
-
return os.str();
|
759
|
+
return string<A>(os.str().c_str(), allocator_);
|
765
760
|
}
|
766
761
|
|
767
762
|
template<typename T, typename S, typename A>
|
768
763
|
string<A> var_opt_sketch<T,S,A>::items_to_string(bool print_gap) const {
|
769
|
-
|
764
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
765
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
766
|
+
std::ostringstream os;
|
770
767
|
os << "### Sketch Items" << std::endl;
|
771
768
|
const uint32_t array_length = (n_ < k_ ? n_ : k_ + 1);
|
772
769
|
for (uint32_t i = 0, display_idx = 0; i < array_length; ++i) {
|
@@ -783,7 +780,7 @@ string<A> var_opt_sketch<T,S,A>::items_to_string(bool print_gap) const {
|
|
783
780
|
++display_idx;
|
784
781
|
}
|
785
782
|
}
|
786
|
-
return os.str();
|
783
|
+
return string<A>(os.str().c_str(), allocator_);
|
787
784
|
}
|
788
785
|
|
789
786
|
template<typename T, typename S, typename A>
|
@@ -1420,7 +1417,7 @@ subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(P predicate) const {
|
|
1420
1417
|
if (effective_sampling_rate < 0.0 || effective_sampling_rate > 1.0)
|
1421
1418
|
throw std::logic_error("invalid sampling rate outside [0.0, 1.0]");
|
1422
1419
|
|
1423
|
-
|
1420
|
+
uint32_t r_true_count = 0;
|
1424
1421
|
++idx; // skip the gap
|
1425
1422
|
for (; idx < (k_ + 1); ++idx) {
|
1426
1423
|
if (predicate(data_[idx])) {
|