datasketches 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/cpc_wrapper.cpp +12 -13
- data/ext/datasketches/ext.cpp +1 -1
- data/ext/datasketches/ext.h +4 -0
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/fi_wrapper.cpp +6 -8
- data/ext/datasketches/hll_wrapper.cpp +13 -14
- data/ext/datasketches/kll_wrapper.cpp +28 -76
- data/ext/datasketches/theta_wrapper.cpp +27 -41
- data/ext/datasketches/vo_wrapper.cpp +4 -6
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/README.md +4 -4
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
- data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
- data/vendor/datasketches-cpp/python/README.md +6 -3
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
- data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
- data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
- data/vendor/datasketches-cpp/setup.py +5 -3
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
- data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
- data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
- metadata +43 -34
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
add_executable(req_test)
|
|
19
|
+
|
|
20
|
+
target_link_libraries(req_test req common_test)
|
|
21
|
+
|
|
22
|
+
set_target_properties(req_test PROPERTIES
|
|
23
|
+
CXX_STANDARD 11
|
|
24
|
+
CXX_STANDARD_REQUIRED YES
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" REQ_TEST_BINARY_PATH)
|
|
28
|
+
string(APPEND REQ_TEST_BINARY_PATH "/")
|
|
29
|
+
target_compile_definitions(req_test
|
|
30
|
+
PRIVATE
|
|
31
|
+
TEST_BINARY_INPUT_PATH="${REQ_TEST_BINARY_PATH}"
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
add_test(
|
|
35
|
+
NAME req_test
|
|
36
|
+
COMMAND req_test
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
target_sources(req_test
|
|
40
|
+
PRIVATE
|
|
41
|
+
req_sketch_test.cpp
|
|
42
|
+
req_sketch_custom_type_test.cpp
|
|
43
|
+
)
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch.hpp>
|
|
21
|
+
#include <sstream>
|
|
22
|
+
|
|
23
|
+
#include <req_sketch.hpp>
|
|
24
|
+
#include <test_allocator.hpp>
|
|
25
|
+
#include <test_type.hpp>
|
|
26
|
+
|
|
27
|
+
namespace datasketches {
|
|
28
|
+
|
|
29
|
+
using req_test_type_sketch = req_sketch<test_type, test_type_less, test_type_serde, test_allocator<test_type>>;
|
|
30
|
+
using alloc = test_allocator<test_type>;
|
|
31
|
+
|
|
32
|
+
TEST_CASE("req sketch custom type", "[req_sketch]") {
|
|
33
|
+
|
|
34
|
+
// setup section
|
|
35
|
+
test_allocator_total_bytes = 0;
|
|
36
|
+
|
|
37
|
+
SECTION("compact level zero") {
|
|
38
|
+
req_test_type_sketch sketch(4, true, 0);
|
|
39
|
+
REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
|
|
40
|
+
REQUIRE_THROWS_AS(sketch.get_min_value(), std::runtime_error);
|
|
41
|
+
REQUIRE_THROWS_AS(sketch.get_max_value(), std::runtime_error);
|
|
42
|
+
REQUIRE(sketch.get_serialized_size_bytes() == 8);
|
|
43
|
+
|
|
44
|
+
for (int i = 0; i < 24; ++i) sketch.update(i);
|
|
45
|
+
//std::cout << sketch.to_string(true);
|
|
46
|
+
|
|
47
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
48
|
+
REQUIRE(sketch.get_n() > sketch.get_num_retained());
|
|
49
|
+
REQUIRE(sketch.get_min_value().get_value() == 0);
|
|
50
|
+
REQUIRE(sketch.get_max_value().get_value() == 23);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
SECTION("merge small") {
|
|
54
|
+
req_test_type_sketch sketch1(4, true, 0);
|
|
55
|
+
sketch1.update(1);
|
|
56
|
+
|
|
57
|
+
req_test_type_sketch sketch2(4, true, 0);
|
|
58
|
+
sketch2.update(2);
|
|
59
|
+
|
|
60
|
+
sketch2.merge(sketch1);
|
|
61
|
+
|
|
62
|
+
//std::cout << sketch2.to_string(true);
|
|
63
|
+
|
|
64
|
+
REQUIRE_FALSE(sketch2.is_estimation_mode());
|
|
65
|
+
REQUIRE(sketch2.get_num_retained() == sketch2.get_n());
|
|
66
|
+
REQUIRE(sketch2.get_min_value().get_value() == 1);
|
|
67
|
+
REQUIRE(sketch2.get_max_value().get_value() == 2);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
SECTION("merge higher levels") {
|
|
71
|
+
req_test_type_sketch sketch1(4, true, 0);
|
|
72
|
+
for (int i = 0; i < 24; ++i) sketch1.update(i);
|
|
73
|
+
|
|
74
|
+
req_test_type_sketch sketch2(4, true, 0);
|
|
75
|
+
for (int i = 0; i < 24; ++i) sketch2.update(i);
|
|
76
|
+
|
|
77
|
+
sketch2.merge(sketch1);
|
|
78
|
+
|
|
79
|
+
//std::cout << sketch2.to_string(true);
|
|
80
|
+
|
|
81
|
+
REQUIRE(sketch2.is_estimation_mode());
|
|
82
|
+
REQUIRE(sketch2.get_n() > sketch2.get_num_retained());
|
|
83
|
+
REQUIRE(sketch2.get_min_value().get_value() == 0);
|
|
84
|
+
REQUIRE(sketch2.get_max_value().get_value() == 23);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
SECTION("serialize deserialize") {
|
|
88
|
+
req_test_type_sketch sketch1(12, true, 0);
|
|
89
|
+
|
|
90
|
+
const int n = 1000;
|
|
91
|
+
for (int i = 0; i < n; i++) sketch1.update(i);
|
|
92
|
+
|
|
93
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
94
|
+
sketch1.serialize(s);
|
|
95
|
+
REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes());
|
|
96
|
+
auto sketch2 = req_test_type_sketch::deserialize(s, alloc(0));
|
|
97
|
+
REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes());
|
|
98
|
+
REQUIRE(s.tellg() == s.tellp());
|
|
99
|
+
REQUIRE(sketch2.is_empty() == sketch1.is_empty());
|
|
100
|
+
REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
|
|
101
|
+
REQUIRE(sketch2.get_n() == sketch1.get_n());
|
|
102
|
+
REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
|
|
103
|
+
REQUIRE(sketch2.get_min_value().get_value() == sketch1.get_min_value().get_value());
|
|
104
|
+
REQUIRE(sketch2.get_max_value().get_value() == sketch1.get_max_value().get_value());
|
|
105
|
+
REQUIRE(sketch2.get_quantile(0.5).get_value() == sketch1.get_quantile(0.5).get_value());
|
|
106
|
+
REQUIRE(sketch2.get_rank(0) == sketch1.get_rank(0));
|
|
107
|
+
REQUIRE(sketch2.get_rank(n) == sketch1.get_rank(n));
|
|
108
|
+
REQUIRE(sketch2.get_rank(n / 2) == sketch1.get_rank(n / 2));
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
SECTION("moving merge") {
|
|
112
|
+
req_test_type_sketch sketch1(4, true, 0);
|
|
113
|
+
for (int i = 0; i < 10; i++) sketch1.update(i);
|
|
114
|
+
req_test_type_sketch sketch2(4, true, 0);
|
|
115
|
+
sketch2.update(10);
|
|
116
|
+
sketch2.merge(std::move(sketch1));
|
|
117
|
+
REQUIRE(sketch2.get_min_value().get_value() == 0);
|
|
118
|
+
REQUIRE(sketch2.get_max_value().get_value() == 10);
|
|
119
|
+
REQUIRE(sketch2.get_n() == 11);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// cleanup
|
|
123
|
+
if (test_allocator_total_bytes != 0) {
|
|
124
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
} /* namespace datasketches */
|
|
@@ -0,0 +1,494 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch.hpp>
|
|
21
|
+
|
|
22
|
+
#include <req_sketch.hpp>
|
|
23
|
+
|
|
24
|
+
#include <fstream>
|
|
25
|
+
#include <sstream>
|
|
26
|
+
#include <limits>
|
|
27
|
+
|
|
28
|
+
namespace datasketches {
|
|
29
|
+
|
|
30
|
+
#ifdef TEST_BINARY_INPUT_PATH
|
|
31
|
+
const std::string input_path = TEST_BINARY_INPUT_PATH;
|
|
32
|
+
#else
|
|
33
|
+
const std::string input_path = "test/";
|
|
34
|
+
#endif
|
|
35
|
+
|
|
36
|
+
TEST_CASE("req sketch: empty", "[req_sketch]") {
|
|
37
|
+
std::cout << "sizeof(req_float_sketch)=" << sizeof(req_sketch<float>) << "\n";
|
|
38
|
+
req_sketch<float> sketch(12);
|
|
39
|
+
REQUIRE(sketch.get_k() == 12);
|
|
40
|
+
REQUIRE(sketch.is_HRA());
|
|
41
|
+
REQUIRE(sketch.is_empty());
|
|
42
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
43
|
+
REQUIRE(sketch.get_n() == 0);
|
|
44
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
|
45
|
+
REQUIRE(std::isnan(sketch.get_rank(0)));
|
|
46
|
+
REQUIRE(std::isnan(sketch.get_rank(std::numeric_limits<float>::infinity())));
|
|
47
|
+
REQUIRE(std::isnan(sketch.get_min_value()));
|
|
48
|
+
REQUIRE(std::isnan(sketch.get_max_value()));
|
|
49
|
+
REQUIRE(std::isnan(sketch.get_quantile(0)));
|
|
50
|
+
REQUIRE(std::isnan(sketch.get_quantile(0.5)));
|
|
51
|
+
REQUIRE(std::isnan(sketch.get_quantile(1)));
|
|
52
|
+
const double ranks[3] {0, 0.5, 1};
|
|
53
|
+
REQUIRE(sketch.get_quantiles(ranks, 3).size() == 0);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
|
|
57
|
+
req_sketch<float> sketch(12, false);
|
|
58
|
+
sketch.update(1);
|
|
59
|
+
REQUIRE_FALSE(sketch.is_HRA());
|
|
60
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
61
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
62
|
+
REQUIRE(sketch.get_n() == 1);
|
|
63
|
+
REQUIRE(sketch.get_num_retained() == 1);
|
|
64
|
+
REQUIRE(sketch.get_rank(1) == 0);
|
|
65
|
+
REQUIRE(sketch.get_rank<true>(1) == 1);
|
|
66
|
+
REQUIRE(sketch.get_rank(1.1) == 1);
|
|
67
|
+
REQUIRE(sketch.get_rank(std::numeric_limits<float>::infinity()) == 1);
|
|
68
|
+
REQUIRE(sketch.get_quantile(0) == 1);
|
|
69
|
+
REQUIRE(sketch.get_quantile(0.5) == 1);
|
|
70
|
+
REQUIRE(sketch.get_quantile(1) == 1);
|
|
71
|
+
|
|
72
|
+
const double ranks[3] {0, 0.5, 1};
|
|
73
|
+
auto quantiles = sketch.get_quantiles(ranks, 3);
|
|
74
|
+
REQUIRE(quantiles.size() == 3);
|
|
75
|
+
REQUIRE(quantiles[0] == 1);
|
|
76
|
+
REQUIRE(quantiles[1] == 1);
|
|
77
|
+
REQUIRE(quantiles[2] == 1);
|
|
78
|
+
|
|
79
|
+
unsigned count = 0;
|
|
80
|
+
for (auto it: sketch) {
|
|
81
|
+
REQUIRE(it.second == 1);
|
|
82
|
+
++count;
|
|
83
|
+
}
|
|
84
|
+
REQUIRE(count == 1);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
TEST_CASE("req sketch: repeated values", "[req_sketch]") {
|
|
88
|
+
req_sketch<float> sketch(12);
|
|
89
|
+
sketch.update(1);
|
|
90
|
+
sketch.update(1);
|
|
91
|
+
sketch.update(1);
|
|
92
|
+
sketch.update(2);
|
|
93
|
+
sketch.update(2);
|
|
94
|
+
sketch.update(2);
|
|
95
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
96
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
97
|
+
REQUIRE(sketch.get_n() == 6);
|
|
98
|
+
REQUIRE(sketch.get_num_retained() == 6);
|
|
99
|
+
REQUIRE(sketch.get_rank(1) == 0);
|
|
100
|
+
REQUIRE(sketch.get_rank<true>(1) == 0.5);
|
|
101
|
+
REQUIRE(sketch.get_rank(2) == 0.5);
|
|
102
|
+
REQUIRE(sketch.get_rank<true>(2) == 1);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
TEST_CASE("req sketch: exact mode", "[req_sketch]") {
|
|
106
|
+
req_sketch<float> sketch(12);
|
|
107
|
+
for (size_t i = 1; i <= 10; ++i) sketch.update(i);
|
|
108
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
109
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
110
|
+
REQUIRE(sketch.get_n() == 10);
|
|
111
|
+
REQUIRE(sketch.get_num_retained() == 10);
|
|
112
|
+
|
|
113
|
+
// like KLL
|
|
114
|
+
REQUIRE(sketch.get_rank(1) == 0);
|
|
115
|
+
REQUIRE(sketch.get_rank(2) == 0.1);
|
|
116
|
+
REQUIRE(sketch.get_rank(6) == 0.5);
|
|
117
|
+
REQUIRE(sketch.get_rank(9) == 0.8);
|
|
118
|
+
REQUIRE(sketch.get_rank(10) == 0.9);
|
|
119
|
+
|
|
120
|
+
// inclusive
|
|
121
|
+
REQUIRE(sketch.get_rank<true>(1) == 0.1);
|
|
122
|
+
REQUIRE(sketch.get_rank<true>(2) == 0.2);
|
|
123
|
+
REQUIRE(sketch.get_rank<true>(5) == 0.5);
|
|
124
|
+
REQUIRE(sketch.get_rank<true>(9) == 0.9);
|
|
125
|
+
REQUIRE(sketch.get_rank<true>(10) == 1);
|
|
126
|
+
|
|
127
|
+
// like KLL
|
|
128
|
+
REQUIRE(sketch.get_quantile(0) == 1);
|
|
129
|
+
REQUIRE(sketch.get_quantile(0.1) == 2);
|
|
130
|
+
REQUIRE(sketch.get_quantile(0.5) == 6);
|
|
131
|
+
REQUIRE(sketch.get_quantile(0.9) == 10);
|
|
132
|
+
REQUIRE(sketch.get_quantile(1) == 10);
|
|
133
|
+
|
|
134
|
+
// inclusive
|
|
135
|
+
REQUIRE(sketch.get_quantile<true>(0) == 1);
|
|
136
|
+
REQUIRE(sketch.get_quantile<true>(0.1) == 1);
|
|
137
|
+
REQUIRE(sketch.get_quantile<true>(0.5) == 5);
|
|
138
|
+
REQUIRE(sketch.get_quantile<true>(0.9) == 9);
|
|
139
|
+
REQUIRE(sketch.get_quantile<true>(1) == 10);
|
|
140
|
+
|
|
141
|
+
const double ranks[3] {0, 0.5, 1};
|
|
142
|
+
auto quantiles = sketch.get_quantiles(ranks, 3);
|
|
143
|
+
REQUIRE(quantiles.size() == 3);
|
|
144
|
+
REQUIRE(quantiles[0] == 1);
|
|
145
|
+
REQUIRE(quantiles[1] == 6);
|
|
146
|
+
REQUIRE(quantiles[2] == 10);
|
|
147
|
+
|
|
148
|
+
const float splits[3] {2, 6, 9};
|
|
149
|
+
auto cdf = sketch.get_CDF(splits, 3);
|
|
150
|
+
REQUIRE(cdf[0] == 0.1);
|
|
151
|
+
REQUIRE(cdf[1] == 0.5);
|
|
152
|
+
REQUIRE(cdf[2] == 0.8);
|
|
153
|
+
REQUIRE(cdf[3] == 1);
|
|
154
|
+
auto pmf = sketch.get_PMF(splits, 3);
|
|
155
|
+
REQUIRE(pmf[0] == Approx(0.1).margin(1e-8));
|
|
156
|
+
REQUIRE(pmf[1] == Approx(0.4).margin(1e-8));
|
|
157
|
+
REQUIRE(pmf[2] == Approx(0.3).margin(1e-8));
|
|
158
|
+
REQUIRE(pmf[3] == Approx(0.2).margin(1e-8));
|
|
159
|
+
|
|
160
|
+
REQUIRE(sketch.get_rank_lower_bound(0.5, 1) == 0.5);
|
|
161
|
+
REQUIRE(sketch.get_rank_upper_bound(0.5, 1) == 0.5);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
TEST_CASE("req sketch: estimation mode", "[req_sketch]") {
|
|
165
|
+
req_sketch<float> sketch(12);
|
|
166
|
+
const size_t n = 100000;
|
|
167
|
+
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
|
168
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
169
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
170
|
+
REQUIRE(sketch.get_n() == n);
|
|
171
|
+
// std::cout << sketch.to_string(true);
|
|
172
|
+
REQUIRE(sketch.get_num_retained() < n);
|
|
173
|
+
REQUIRE(sketch.get_rank(0) == 0);
|
|
174
|
+
REQUIRE(sketch.get_rank(n) == 1);
|
|
175
|
+
REQUIRE(sketch.get_rank(n / 2) == Approx(0.5).margin(0.01));
|
|
176
|
+
REQUIRE(sketch.get_rank(n - 1) == Approx(1).margin(0.01));
|
|
177
|
+
REQUIRE(sketch.get_min_value() == 0);
|
|
178
|
+
REQUIRE(sketch.get_max_value() == n - 1);
|
|
179
|
+
REQUIRE(sketch.get_rank_lower_bound(0.5, 1) < 0.5);
|
|
180
|
+
REQUIRE(sketch.get_rank_upper_bound(0.5, 1) > 0.5);
|
|
181
|
+
|
|
182
|
+
unsigned count = 0;
|
|
183
|
+
for (auto it: sketch) {
|
|
184
|
+
REQUIRE(it.second >= 1);
|
|
185
|
+
++count;
|
|
186
|
+
}
|
|
187
|
+
REQUIRE(count == sketch.get_num_retained());
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
TEST_CASE("req sketch: stream serialize-deserialize empty", "[req_sketch]") {
|
|
191
|
+
req_sketch<float> sketch(12);
|
|
192
|
+
|
|
193
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
194
|
+
sketch.serialize(s);
|
|
195
|
+
auto sketch2 = req_sketch<float>::deserialize(s);
|
|
196
|
+
REQUIRE(s.tellg() == s.tellp());
|
|
197
|
+
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
198
|
+
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
199
|
+
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
200
|
+
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
201
|
+
REQUIRE(std::isnan(sketch2.get_min_value()));
|
|
202
|
+
REQUIRE(std::isnan(sketch2.get_max_value()));
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
TEST_CASE("req sketch: byte serialize-deserialize empty", "[req_sketch]") {
|
|
206
|
+
req_sketch<float> sketch(12);
|
|
207
|
+
|
|
208
|
+
auto bytes = sketch.serialize();
|
|
209
|
+
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
|
210
|
+
auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
|
|
211
|
+
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
|
212
|
+
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
213
|
+
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
214
|
+
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
215
|
+
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
216
|
+
REQUIRE(std::isnan(sketch2.get_min_value()));
|
|
217
|
+
REQUIRE(std::isnan(sketch2.get_max_value()));
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]") {
|
|
221
|
+
req_sketch<float> sketch(12);
|
|
222
|
+
sketch.update(1);
|
|
223
|
+
|
|
224
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
225
|
+
sketch.serialize(s);
|
|
226
|
+
auto sketch2 = req_sketch<float>::deserialize(s);
|
|
227
|
+
REQUIRE(s.tellg() == s.tellp());
|
|
228
|
+
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
229
|
+
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
230
|
+
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
231
|
+
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
232
|
+
REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
|
|
233
|
+
REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]") {
|
|
237
|
+
req_sketch<float> sketch(12);
|
|
238
|
+
sketch.update(1);
|
|
239
|
+
|
|
240
|
+
auto bytes = sketch.serialize();
|
|
241
|
+
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
|
242
|
+
auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
|
|
243
|
+
std::cout << sketch2.to_string(true);
|
|
244
|
+
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
|
245
|
+
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
246
|
+
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
247
|
+
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
248
|
+
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
249
|
+
REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
|
|
250
|
+
REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]") {
|
|
254
|
+
req_sketch<float> sketch(12);
|
|
255
|
+
const size_t n = 50;
|
|
256
|
+
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
|
257
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
258
|
+
|
|
259
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
260
|
+
sketch.serialize(s);
|
|
261
|
+
auto sketch2 = req_sketch<float>::deserialize(s);
|
|
262
|
+
REQUIRE(s.tellg() == s.tellp());
|
|
263
|
+
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
264
|
+
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
265
|
+
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
266
|
+
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
267
|
+
REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
|
|
268
|
+
REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
|
|
272
|
+
req_sketch<float> sketch(12);
|
|
273
|
+
const size_t n = 50;
|
|
274
|
+
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
|
275
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
276
|
+
|
|
277
|
+
auto bytes = sketch.serialize();
|
|
278
|
+
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
|
279
|
+
auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
|
|
280
|
+
std::cout << sketch2.to_string(true);
|
|
281
|
+
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
|
282
|
+
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
283
|
+
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
284
|
+
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
285
|
+
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
286
|
+
REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
|
|
287
|
+
REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sketch]") {
|
|
291
|
+
req_sketch<float> sketch(12);
|
|
292
|
+
const size_t n = 100000;
|
|
293
|
+
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
|
294
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
295
|
+
|
|
296
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
297
|
+
sketch.serialize(s);
|
|
298
|
+
auto sketch2 = req_sketch<float>::deserialize(s);
|
|
299
|
+
REQUIRE(s.tellg() == s.tellp());
|
|
300
|
+
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
301
|
+
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
302
|
+
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
303
|
+
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
304
|
+
REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
|
|
305
|
+
REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch]") {
|
|
309
|
+
req_sketch<float> sketch(12);
|
|
310
|
+
const size_t n = 100000;
|
|
311
|
+
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
|
312
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
313
|
+
|
|
314
|
+
auto bytes = sketch.serialize();
|
|
315
|
+
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
|
316
|
+
auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
|
|
317
|
+
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
|
318
|
+
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
319
|
+
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
320
|
+
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
321
|
+
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
322
|
+
REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
|
|
323
|
+
REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
TEST_CASE("req sketch: serialize deserialize stream and bytes equivalence", "[req_sketch]") {
|
|
327
|
+
req_sketch<float> sketch(12);
|
|
328
|
+
const size_t n = 100000;
|
|
329
|
+
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
|
330
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
331
|
+
|
|
332
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
333
|
+
sketch.serialize(s);
|
|
334
|
+
auto bytes = sketch.serialize();
|
|
335
|
+
REQUIRE(bytes.size() == static_cast<size_t>(s.tellp()));
|
|
336
|
+
for (size_t i = 0; i < bytes.size(); ++i) {
|
|
337
|
+
REQUIRE(((char*)bytes.data())[i] == (char)s.get());
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
s.seekg(0); // rewind
|
|
341
|
+
auto sketch1 = req_sketch<float>::deserialize(s);
|
|
342
|
+
auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
|
|
343
|
+
REQUIRE(bytes.size() == static_cast<size_t>(s.tellg()));
|
|
344
|
+
REQUIRE(sketch2.is_empty() == sketch1.is_empty());
|
|
345
|
+
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
346
|
+
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
347
|
+
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
348
|
+
REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
|
|
349
|
+
REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
TEST_CASE("req sketch: stream deserialize from Java - empty", "[req_sketch]") {
|
|
353
|
+
std::ifstream is;
|
|
354
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
355
|
+
is.open(input_path + "req_float_empty_from_java.sk", std::ios::binary);
|
|
356
|
+
auto sketch = req_sketch<float>::deserialize(is);
|
|
357
|
+
REQUIRE(sketch.is_empty());
|
|
358
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
359
|
+
REQUIRE(sketch.get_n() == 0);
|
|
360
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
|
361
|
+
REQUIRE(std::isnan(sketch.get_min_value()));
|
|
362
|
+
REQUIRE(std::isnan(sketch.get_max_value()));
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
TEST_CASE("req sketch: stream deserialize from Java - single item", "[req_sketch]") {
|
|
366
|
+
std::ifstream is;
|
|
367
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
368
|
+
is.open(input_path + "req_float_single_item_from_java.sk", std::ios::binary);
|
|
369
|
+
auto sketch = req_sketch<float>::deserialize(is);
|
|
370
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
371
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
372
|
+
REQUIRE(sketch.get_n() == 1);
|
|
373
|
+
REQUIRE(sketch.get_num_retained() == 1);
|
|
374
|
+
REQUIRE(sketch.get_min_value() == 1);
|
|
375
|
+
REQUIRE(sketch.get_max_value() == 1);
|
|
376
|
+
REQUIRE(sketch.get_rank(1) == 0);
|
|
377
|
+
REQUIRE(sketch.get_rank<true>(1) == 1);
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]") {
|
|
381
|
+
std::ifstream is;
|
|
382
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
383
|
+
is.open(input_path + "req_float_raw_items_from_java.sk", std::ios::binary);
|
|
384
|
+
auto sketch = req_sketch<float>::deserialize(is);
|
|
385
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
386
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
387
|
+
REQUIRE(sketch.get_n() == 4);
|
|
388
|
+
REQUIRE(sketch.get_num_retained() == 4);
|
|
389
|
+
REQUIRE(sketch.get_min_value() == 0);
|
|
390
|
+
REQUIRE(sketch.get_max_value() == 3);
|
|
391
|
+
REQUIRE(sketch.get_rank(2) == 0.5);
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]") {
|
|
395
|
+
std::ifstream is;
|
|
396
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
397
|
+
is.open(input_path + "req_float_exact_from_java.sk", std::ios::binary);
|
|
398
|
+
auto sketch = req_sketch<float>::deserialize(is);
|
|
399
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
400
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
401
|
+
REQUIRE(sketch.get_n() == 100);
|
|
402
|
+
REQUIRE(sketch.get_num_retained() == 100);
|
|
403
|
+
REQUIRE(sketch.get_min_value() == 0);
|
|
404
|
+
REQUIRE(sketch.get_max_value() == 99);
|
|
405
|
+
REQUIRE(sketch.get_rank(50) == 0.5);
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sketch]") {
|
|
409
|
+
std::ifstream is;
|
|
410
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
411
|
+
is.open(input_path + "req_float_estimation_from_java.sk", std::ios::binary);
|
|
412
|
+
auto sketch = req_sketch<float>::deserialize(is);
|
|
413
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
414
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
415
|
+
REQUIRE(sketch.get_n() == 10000);
|
|
416
|
+
REQUIRE(sketch.get_num_retained() == 2942);
|
|
417
|
+
REQUIRE(sketch.get_min_value() == 0);
|
|
418
|
+
REQUIRE(sketch.get_max_value() == 9999);
|
|
419
|
+
REQUIRE(sketch.get_rank(5000) == 0.5);
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
|
|
423
|
+
req_sketch<float> sketch1(40);
|
|
424
|
+
|
|
425
|
+
req_sketch<float> sketch2(40);
|
|
426
|
+
for (size_t i = 0; i < 1000; ++i) sketch2.update(i);
|
|
427
|
+
|
|
428
|
+
sketch1.merge(sketch2);
|
|
429
|
+
REQUIRE(sketch1.get_min_value() == 0);
|
|
430
|
+
REQUIRE(sketch1.get_max_value() == 999);
|
|
431
|
+
REQUIRE(sketch1.get_quantile(0.25) == Approx(250).margin(3));
|
|
432
|
+
REQUIRE(sketch1.get_quantile(0.5) == Approx(500).margin(3));
|
|
433
|
+
REQUIRE(sketch1.get_quantile(0.75) == Approx(750).margin(3));
|
|
434
|
+
REQUIRE(sketch1.get_rank(500) == Approx(0.5).margin(0.01));
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
TEST_CASE("req sketch: merge", "[req_sketch]") {
|
|
438
|
+
req_sketch<float> sketch1(100);
|
|
439
|
+
for (size_t i = 0; i < 1000; ++i) sketch1.update(i);
|
|
440
|
+
|
|
441
|
+
req_sketch<float> sketch2(100);
|
|
442
|
+
for (size_t i = 1000; i < 2000; ++i) sketch2.update(i);
|
|
443
|
+
|
|
444
|
+
sketch1.merge(sketch2);
|
|
445
|
+
REQUIRE(sketch1.get_min_value() == 0);
|
|
446
|
+
REQUIRE(sketch1.get_max_value() == 1999);
|
|
447
|
+
REQUIRE(sketch1.get_quantile(0.25) == Approx(500).margin(3));
|
|
448
|
+
REQUIRE(sketch1.get_quantile(0.5) == Approx(1000).margin(1));
|
|
449
|
+
REQUIRE(sketch1.get_quantile(0.75) == Approx(1500).margin(1));
|
|
450
|
+
REQUIRE(sketch1.get_rank(1000) == Approx(0.5).margin(0.01));
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
TEST_CASE("req sketch: merge multiple", "[req_sketch]") {
|
|
454
|
+
req_sketch<float> sketch1(12);
|
|
455
|
+
for (size_t i = 0; i < 40; ++i) sketch1.update(i);
|
|
456
|
+
|
|
457
|
+
req_sketch<float> sketch2(12);
|
|
458
|
+
for (size_t i = 40; i < 80; ++i) sketch2.update(i);
|
|
459
|
+
|
|
460
|
+
req_sketch<float> sketch3(12);
|
|
461
|
+
for (size_t i = 80; i < 120; ++i) sketch3.update(i);
|
|
462
|
+
|
|
463
|
+
req_sketch<float> sketch(12);
|
|
464
|
+
sketch.merge(sketch1);
|
|
465
|
+
sketch.merge(sketch2);
|
|
466
|
+
sketch.merge(sketch3);
|
|
467
|
+
REQUIRE(sketch.get_min_value() == 0);
|
|
468
|
+
REQUIRE(sketch.get_max_value() == 119);
|
|
469
|
+
REQUIRE(sketch.get_quantile(0.5) == Approx(60).margin(3));
|
|
470
|
+
REQUIRE(sketch.get_rank(60) == Approx(0.5).margin(0.01));
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
TEST_CASE("req sketch: merge incompatible HRA and LRA", "[req_sketch]") {
|
|
474
|
+
req_sketch<float> sketch1(12);
|
|
475
|
+
sketch1.update(1);
|
|
476
|
+
|
|
477
|
+
req_sketch<float> sketch2(12, false);
|
|
478
|
+
sketch2.update(1);
|
|
479
|
+
|
|
480
|
+
REQUIRE_THROWS_AS(sketch1.merge(sketch2), std::invalid_argument);
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
//TEST_CASE("for manual comparison with Java") {
|
|
484
|
+
// req_sketch<float> sketch(12, false);
|
|
485
|
+
// for (size_t i = 0; i < 100000; ++i) sketch.update(i);
|
|
486
|
+
// sketch.merge(sketch);
|
|
487
|
+
// std::ofstream os;
|
|
488
|
+
// os.exceptions(std::ios::failbit | std::ios::badbit);
|
|
489
|
+
// os.open("req_float_lra_12_100000_merged.sk", std::ios::binary);
|
|
490
|
+
// sketch.get_quantile(0.5); // force sorting level 0
|
|
491
|
+
// sketch.serialize(os);
|
|
492
|
+
//}
|
|
493
|
+
|
|
494
|
+
} /* namespace datasketches */
|