datasketches 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  4. data/ext/datasketches/ext.cpp +1 -1
  5. data/ext/datasketches/ext.h +4 -0
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/fi_wrapper.cpp +6 -8
  8. data/ext/datasketches/hll_wrapper.cpp +13 -14
  9. data/ext/datasketches/kll_wrapper.cpp +28 -76
  10. data/ext/datasketches/theta_wrapper.cpp +27 -41
  11. data/ext/datasketches/vo_wrapper.cpp +4 -6
  12. data/lib/datasketches/version.rb +1 -1
  13. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  14. data/vendor/datasketches-cpp/README.md +4 -4
  15. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
  16. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  17. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  18. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  19. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  20. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
  21. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
  22. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
  23. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
  24. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
  25. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  26. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
  27. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
  28. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
  29. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
  30. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
  31. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  33. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  34. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
  35. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  36. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  38. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
  41. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
  42. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
  44. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
  45. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
  46. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
  47. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  48. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  49. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
  50. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
  51. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
  52. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
  53. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
  54. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
  55. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
  56. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
  57. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
  58. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
  59. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
  60. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
  62. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  63. data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
  64. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
  65. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
  66. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
  67. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
  68. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
  70. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
  71. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
  72. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  74. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
  75. data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
  76. data/vendor/datasketches-cpp/python/README.md +6 -3
  77. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  78. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
  79. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
  80. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  81. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
  82. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
  83. data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
  84. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  85. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  86. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  87. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
  88. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  89. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
  90. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  91. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  92. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  93. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  94. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  95. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  96. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  97. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  98. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  99. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  100. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  101. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
  104. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  105. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
  106. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  107. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  108. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
  109. data/vendor/datasketches-cpp/setup.py +5 -3
  110. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  111. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
  112. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  114. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  115. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  116. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
  117. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
  119. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  120. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  121. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  122. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  123. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
  124. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  125. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  126. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
  127. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
  128. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  129. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  130. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
  131. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  132. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
  133. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
  134. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  135. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
  136. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
  137. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  138. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
  141. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  142. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
  143. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
  144. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
  145. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
  146. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
  147. metadata +43 -34
  148. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  149. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  150. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  151. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  152. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  153. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  154. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  155. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  156. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  157. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  160. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -0,0 +1,43 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ add_executable(req_test)
19
+
20
+ target_link_libraries(req_test req common_test)
21
+
22
+ set_target_properties(req_test PROPERTIES
23
+ CXX_STANDARD 11
24
+ CXX_STANDARD_REQUIRED YES
25
+ )
26
+
27
+ file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" REQ_TEST_BINARY_PATH)
28
+ string(APPEND REQ_TEST_BINARY_PATH "/")
29
+ target_compile_definitions(req_test
30
+ PRIVATE
31
+ TEST_BINARY_INPUT_PATH="${REQ_TEST_BINARY_PATH}"
32
+ )
33
+
34
+ add_test(
35
+ NAME req_test
36
+ COMMAND req_test
37
+ )
38
+
39
+ target_sources(req_test
40
+ PRIVATE
41
+ req_sketch_test.cpp
42
+ req_sketch_custom_type_test.cpp
43
+ )
@@ -0,0 +1,128 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch.hpp>
21
+ #include <sstream>
22
+
23
+ #include <req_sketch.hpp>
24
+ #include <test_allocator.hpp>
25
+ #include <test_type.hpp>
26
+
27
+ namespace datasketches {
28
+
29
+ using req_test_type_sketch = req_sketch<test_type, test_type_less, test_type_serde, test_allocator<test_type>>;
30
+ using alloc = test_allocator<test_type>;
31
+
32
+ TEST_CASE("req sketch custom type", "[req_sketch]") {
33
+
34
+ // setup section
35
+ test_allocator_total_bytes = 0;
36
+
37
+ SECTION("compact level zero") {
38
+ req_test_type_sketch sketch(4, true, 0);
39
+ REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
40
+ REQUIRE_THROWS_AS(sketch.get_min_value(), std::runtime_error);
41
+ REQUIRE_THROWS_AS(sketch.get_max_value(), std::runtime_error);
42
+ REQUIRE(sketch.get_serialized_size_bytes() == 8);
43
+
44
+ for (int i = 0; i < 24; ++i) sketch.update(i);
45
+ //std::cout << sketch.to_string(true);
46
+
47
+ REQUIRE(sketch.is_estimation_mode());
48
+ REQUIRE(sketch.get_n() > sketch.get_num_retained());
49
+ REQUIRE(sketch.get_min_value().get_value() == 0);
50
+ REQUIRE(sketch.get_max_value().get_value() == 23);
51
+ }
52
+
53
+ SECTION("merge small") {
54
+ req_test_type_sketch sketch1(4, true, 0);
55
+ sketch1.update(1);
56
+
57
+ req_test_type_sketch sketch2(4, true, 0);
58
+ sketch2.update(2);
59
+
60
+ sketch2.merge(sketch1);
61
+
62
+ //std::cout << sketch2.to_string(true);
63
+
64
+ REQUIRE_FALSE(sketch2.is_estimation_mode());
65
+ REQUIRE(sketch2.get_num_retained() == sketch2.get_n());
66
+ REQUIRE(sketch2.get_min_value().get_value() == 1);
67
+ REQUIRE(sketch2.get_max_value().get_value() == 2);
68
+ }
69
+
70
+ SECTION("merge higher levels") {
71
+ req_test_type_sketch sketch1(4, true, 0);
72
+ for (int i = 0; i < 24; ++i) sketch1.update(i);
73
+
74
+ req_test_type_sketch sketch2(4, true, 0);
75
+ for (int i = 0; i < 24; ++i) sketch2.update(i);
76
+
77
+ sketch2.merge(sketch1);
78
+
79
+ //std::cout << sketch2.to_string(true);
80
+
81
+ REQUIRE(sketch2.is_estimation_mode());
82
+ REQUIRE(sketch2.get_n() > sketch2.get_num_retained());
83
+ REQUIRE(sketch2.get_min_value().get_value() == 0);
84
+ REQUIRE(sketch2.get_max_value().get_value() == 23);
85
+ }
86
+
87
+ SECTION("serialize deserialize") {
88
+ req_test_type_sketch sketch1(12, true, 0);
89
+
90
+ const int n = 1000;
91
+ for (int i = 0; i < n; i++) sketch1.update(i);
92
+
93
+ std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
94
+ sketch1.serialize(s);
95
+ REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes());
96
+ auto sketch2 = req_test_type_sketch::deserialize(s, alloc(0));
97
+ REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes());
98
+ REQUIRE(s.tellg() == s.tellp());
99
+ REQUIRE(sketch2.is_empty() == sketch1.is_empty());
100
+ REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
101
+ REQUIRE(sketch2.get_n() == sketch1.get_n());
102
+ REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
103
+ REQUIRE(sketch2.get_min_value().get_value() == sketch1.get_min_value().get_value());
104
+ REQUIRE(sketch2.get_max_value().get_value() == sketch1.get_max_value().get_value());
105
+ REQUIRE(sketch2.get_quantile(0.5).get_value() == sketch1.get_quantile(0.5).get_value());
106
+ REQUIRE(sketch2.get_rank(0) == sketch1.get_rank(0));
107
+ REQUIRE(sketch2.get_rank(n) == sketch1.get_rank(n));
108
+ REQUIRE(sketch2.get_rank(n / 2) == sketch1.get_rank(n / 2));
109
+ }
110
+
111
+ SECTION("moving merge") {
112
+ req_test_type_sketch sketch1(4, true, 0);
113
+ for (int i = 0; i < 10; i++) sketch1.update(i);
114
+ req_test_type_sketch sketch2(4, true, 0);
115
+ sketch2.update(10);
116
+ sketch2.merge(std::move(sketch1));
117
+ REQUIRE(sketch2.get_min_value().get_value() == 0);
118
+ REQUIRE(sketch2.get_max_value().get_value() == 10);
119
+ REQUIRE(sketch2.get_n() == 11);
120
+ }
121
+
122
+ // cleanup
123
+ if (test_allocator_total_bytes != 0) {
124
+ REQUIRE(test_allocator_total_bytes == 0);
125
+ }
126
+ }
127
+
128
+ } /* namespace datasketches */
@@ -0,0 +1,494 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch.hpp>
21
+
22
+ #include <req_sketch.hpp>
23
+
24
+ #include <fstream>
25
+ #include <sstream>
26
+ #include <limits>
27
+
28
+ namespace datasketches {
29
+
30
+ #ifdef TEST_BINARY_INPUT_PATH
31
+ const std::string input_path = TEST_BINARY_INPUT_PATH;
32
+ #else
33
+ const std::string input_path = "test/";
34
+ #endif
35
+
36
+ TEST_CASE("req sketch: empty", "[req_sketch]") {
37
+ std::cout << "sizeof(req_float_sketch)=" << sizeof(req_sketch<float>) << "\n";
38
+ req_sketch<float> sketch(12);
39
+ REQUIRE(sketch.get_k() == 12);
40
+ REQUIRE(sketch.is_HRA());
41
+ REQUIRE(sketch.is_empty());
42
+ REQUIRE_FALSE(sketch.is_estimation_mode());
43
+ REQUIRE(sketch.get_n() == 0);
44
+ REQUIRE(sketch.get_num_retained() == 0);
45
+ REQUIRE(std::isnan(sketch.get_rank(0)));
46
+ REQUIRE(std::isnan(sketch.get_rank(std::numeric_limits<float>::infinity())));
47
+ REQUIRE(std::isnan(sketch.get_min_value()));
48
+ REQUIRE(std::isnan(sketch.get_max_value()));
49
+ REQUIRE(std::isnan(sketch.get_quantile(0)));
50
+ REQUIRE(std::isnan(sketch.get_quantile(0.5)));
51
+ REQUIRE(std::isnan(sketch.get_quantile(1)));
52
+ const double ranks[3] {0, 0.5, 1};
53
+ REQUIRE(sketch.get_quantiles(ranks, 3).size() == 0);
54
+ }
55
+
56
+ TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
57
+ req_sketch<float> sketch(12, false);
58
+ sketch.update(1);
59
+ REQUIRE_FALSE(sketch.is_HRA());
60
+ REQUIRE_FALSE(sketch.is_empty());
61
+ REQUIRE_FALSE(sketch.is_estimation_mode());
62
+ REQUIRE(sketch.get_n() == 1);
63
+ REQUIRE(sketch.get_num_retained() == 1);
64
+ REQUIRE(sketch.get_rank(1) == 0);
65
+ REQUIRE(sketch.get_rank<true>(1) == 1);
66
+ REQUIRE(sketch.get_rank(1.1) == 1);
67
+ REQUIRE(sketch.get_rank(std::numeric_limits<float>::infinity()) == 1);
68
+ REQUIRE(sketch.get_quantile(0) == 1);
69
+ REQUIRE(sketch.get_quantile(0.5) == 1);
70
+ REQUIRE(sketch.get_quantile(1) == 1);
71
+
72
+ const double ranks[3] {0, 0.5, 1};
73
+ auto quantiles = sketch.get_quantiles(ranks, 3);
74
+ REQUIRE(quantiles.size() == 3);
75
+ REQUIRE(quantiles[0] == 1);
76
+ REQUIRE(quantiles[1] == 1);
77
+ REQUIRE(quantiles[2] == 1);
78
+
79
+ unsigned count = 0;
80
+ for (auto it: sketch) {
81
+ REQUIRE(it.second == 1);
82
+ ++count;
83
+ }
84
+ REQUIRE(count == 1);
85
+ }
86
+
87
+ TEST_CASE("req sketch: repeated values", "[req_sketch]") {
88
+ req_sketch<float> sketch(12);
89
+ sketch.update(1);
90
+ sketch.update(1);
91
+ sketch.update(1);
92
+ sketch.update(2);
93
+ sketch.update(2);
94
+ sketch.update(2);
95
+ REQUIRE_FALSE(sketch.is_empty());
96
+ REQUIRE_FALSE(sketch.is_estimation_mode());
97
+ REQUIRE(sketch.get_n() == 6);
98
+ REQUIRE(sketch.get_num_retained() == 6);
99
+ REQUIRE(sketch.get_rank(1) == 0);
100
+ REQUIRE(sketch.get_rank<true>(1) == 0.5);
101
+ REQUIRE(sketch.get_rank(2) == 0.5);
102
+ REQUIRE(sketch.get_rank<true>(2) == 1);
103
+ }
104
+
105
+ TEST_CASE("req sketch: exact mode", "[req_sketch]") {
106
+ req_sketch<float> sketch(12);
107
+ for (size_t i = 1; i <= 10; ++i) sketch.update(i);
108
+ REQUIRE_FALSE(sketch.is_empty());
109
+ REQUIRE_FALSE(sketch.is_estimation_mode());
110
+ REQUIRE(sketch.get_n() == 10);
111
+ REQUIRE(sketch.get_num_retained() == 10);
112
+
113
+ // like KLL
114
+ REQUIRE(sketch.get_rank(1) == 0);
115
+ REQUIRE(sketch.get_rank(2) == 0.1);
116
+ REQUIRE(sketch.get_rank(6) == 0.5);
117
+ REQUIRE(sketch.get_rank(9) == 0.8);
118
+ REQUIRE(sketch.get_rank(10) == 0.9);
119
+
120
+ // inclusive
121
+ REQUIRE(sketch.get_rank<true>(1) == 0.1);
122
+ REQUIRE(sketch.get_rank<true>(2) == 0.2);
123
+ REQUIRE(sketch.get_rank<true>(5) == 0.5);
124
+ REQUIRE(sketch.get_rank<true>(9) == 0.9);
125
+ REQUIRE(sketch.get_rank<true>(10) == 1);
126
+
127
+ // like KLL
128
+ REQUIRE(sketch.get_quantile(0) == 1);
129
+ REQUIRE(sketch.get_quantile(0.1) == 2);
130
+ REQUIRE(sketch.get_quantile(0.5) == 6);
131
+ REQUIRE(sketch.get_quantile(0.9) == 10);
132
+ REQUIRE(sketch.get_quantile(1) == 10);
133
+
134
+ // inclusive
135
+ REQUIRE(sketch.get_quantile<true>(0) == 1);
136
+ REQUIRE(sketch.get_quantile<true>(0.1) == 1);
137
+ REQUIRE(sketch.get_quantile<true>(0.5) == 5);
138
+ REQUIRE(sketch.get_quantile<true>(0.9) == 9);
139
+ REQUIRE(sketch.get_quantile<true>(1) == 10);
140
+
141
+ const double ranks[3] {0, 0.5, 1};
142
+ auto quantiles = sketch.get_quantiles(ranks, 3);
143
+ REQUIRE(quantiles.size() == 3);
144
+ REQUIRE(quantiles[0] == 1);
145
+ REQUIRE(quantiles[1] == 6);
146
+ REQUIRE(quantiles[2] == 10);
147
+
148
+ const float splits[3] {2, 6, 9};
149
+ auto cdf = sketch.get_CDF(splits, 3);
150
+ REQUIRE(cdf[0] == 0.1);
151
+ REQUIRE(cdf[1] == 0.5);
152
+ REQUIRE(cdf[2] == 0.8);
153
+ REQUIRE(cdf[3] == 1);
154
+ auto pmf = sketch.get_PMF(splits, 3);
155
+ REQUIRE(pmf[0] == Approx(0.1).margin(1e-8));
156
+ REQUIRE(pmf[1] == Approx(0.4).margin(1e-8));
157
+ REQUIRE(pmf[2] == Approx(0.3).margin(1e-8));
158
+ REQUIRE(pmf[3] == Approx(0.2).margin(1e-8));
159
+
160
+ REQUIRE(sketch.get_rank_lower_bound(0.5, 1) == 0.5);
161
+ REQUIRE(sketch.get_rank_upper_bound(0.5, 1) == 0.5);
162
+ }
163
+
164
+ TEST_CASE("req sketch: estimation mode", "[req_sketch]") {
165
+ req_sketch<float> sketch(12);
166
+ const size_t n = 100000;
167
+ for (size_t i = 0; i < n; ++i) sketch.update(i);
168
+ REQUIRE_FALSE(sketch.is_empty());
169
+ REQUIRE(sketch.is_estimation_mode());
170
+ REQUIRE(sketch.get_n() == n);
171
+ // std::cout << sketch.to_string(true);
172
+ REQUIRE(sketch.get_num_retained() < n);
173
+ REQUIRE(sketch.get_rank(0) == 0);
174
+ REQUIRE(sketch.get_rank(n) == 1);
175
+ REQUIRE(sketch.get_rank(n / 2) == Approx(0.5).margin(0.01));
176
+ REQUIRE(sketch.get_rank(n - 1) == Approx(1).margin(0.01));
177
+ REQUIRE(sketch.get_min_value() == 0);
178
+ REQUIRE(sketch.get_max_value() == n - 1);
179
+ REQUIRE(sketch.get_rank_lower_bound(0.5, 1) < 0.5);
180
+ REQUIRE(sketch.get_rank_upper_bound(0.5, 1) > 0.5);
181
+
182
+ unsigned count = 0;
183
+ for (auto it: sketch) {
184
+ REQUIRE(it.second >= 1);
185
+ ++count;
186
+ }
187
+ REQUIRE(count == sketch.get_num_retained());
188
+ }
189
+
190
+ TEST_CASE("req sketch: stream serialize-deserialize empty", "[req_sketch]") {
191
+ req_sketch<float> sketch(12);
192
+
193
+ std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
194
+ sketch.serialize(s);
195
+ auto sketch2 = req_sketch<float>::deserialize(s);
196
+ REQUIRE(s.tellg() == s.tellp());
197
+ REQUIRE(sketch2.is_empty() == sketch.is_empty());
198
+ REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
199
+ REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
200
+ REQUIRE(sketch2.get_n() == sketch.get_n());
201
+ REQUIRE(std::isnan(sketch2.get_min_value()));
202
+ REQUIRE(std::isnan(sketch2.get_max_value()));
203
+ }
204
+
205
+ TEST_CASE("req sketch: byte serialize-deserialize empty", "[req_sketch]") {
206
+ req_sketch<float> sketch(12);
207
+
208
+ auto bytes = sketch.serialize();
209
+ REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
210
+ auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
211
+ REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
212
+ REQUIRE(sketch2.is_empty() == sketch.is_empty());
213
+ REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
214
+ REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
215
+ REQUIRE(sketch2.get_n() == sketch.get_n());
216
+ REQUIRE(std::isnan(sketch2.get_min_value()));
217
+ REQUIRE(std::isnan(sketch2.get_max_value()));
218
+ }
219
+
220
+ TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]") {
221
+ req_sketch<float> sketch(12);
222
+ sketch.update(1);
223
+
224
+ std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
225
+ sketch.serialize(s);
226
+ auto sketch2 = req_sketch<float>::deserialize(s);
227
+ REQUIRE(s.tellg() == s.tellp());
228
+ REQUIRE(sketch2.is_empty() == sketch.is_empty());
229
+ REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
230
+ REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
231
+ REQUIRE(sketch2.get_n() == sketch.get_n());
232
+ REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
233
+ REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
234
+ }
235
+
236
+ TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]") {
237
+ req_sketch<float> sketch(12);
238
+ sketch.update(1);
239
+
240
+ auto bytes = sketch.serialize();
241
+ REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
242
+ auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
243
+ std::cout << sketch2.to_string(true);
244
+ REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
245
+ REQUIRE(sketch2.is_empty() == sketch.is_empty());
246
+ REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
247
+ REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
248
+ REQUIRE(sketch2.get_n() == sketch.get_n());
249
+ REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
250
+ REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
251
+ }
252
+
253
+ TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]") {
254
+ req_sketch<float> sketch(12);
255
+ const size_t n = 50;
256
+ for (size_t i = 0; i < n; ++i) sketch.update(i);
257
+ REQUIRE_FALSE(sketch.is_estimation_mode());
258
+
259
+ std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
260
+ sketch.serialize(s);
261
+ auto sketch2 = req_sketch<float>::deserialize(s);
262
+ REQUIRE(s.tellg() == s.tellp());
263
+ REQUIRE(sketch2.is_empty() == sketch.is_empty());
264
+ REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
265
+ REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
266
+ REQUIRE(sketch2.get_n() == sketch.get_n());
267
+ REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
268
+ REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
269
+ }
270
+
271
+ TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
272
+ req_sketch<float> sketch(12);
273
+ const size_t n = 50;
274
+ for (size_t i = 0; i < n; ++i) sketch.update(i);
275
+ REQUIRE_FALSE(sketch.is_estimation_mode());
276
+
277
+ auto bytes = sketch.serialize();
278
+ REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
279
+ auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
280
+ std::cout << sketch2.to_string(true);
281
+ REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
282
+ REQUIRE(sketch2.is_empty() == sketch.is_empty());
283
+ REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
284
+ REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
285
+ REQUIRE(sketch2.get_n() == sketch.get_n());
286
+ REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
287
+ REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
288
+ }
289
+
290
+ TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sketch]") {
291
+ req_sketch<float> sketch(12);
292
+ const size_t n = 100000;
293
+ for (size_t i = 0; i < n; ++i) sketch.update(i);
294
+ REQUIRE(sketch.is_estimation_mode());
295
+
296
+ std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
297
+ sketch.serialize(s);
298
+ auto sketch2 = req_sketch<float>::deserialize(s);
299
+ REQUIRE(s.tellg() == s.tellp());
300
+ REQUIRE(sketch2.is_empty() == sketch.is_empty());
301
+ REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
302
+ REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
303
+ REQUIRE(sketch2.get_n() == sketch.get_n());
304
+ REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
305
+ REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
306
+ }
307
+
308
+ TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch]") {
309
+ req_sketch<float> sketch(12);
310
+ const size_t n = 100000;
311
+ for (size_t i = 0; i < n; ++i) sketch.update(i);
312
+ REQUIRE(sketch.is_estimation_mode());
313
+
314
+ auto bytes = sketch.serialize();
315
+ REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
316
+ auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
317
+ REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
318
+ REQUIRE(sketch2.is_empty() == sketch.is_empty());
319
+ REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
320
+ REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
321
+ REQUIRE(sketch2.get_n() == sketch.get_n());
322
+ REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
323
+ REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
324
+ }
325
+
326
+ TEST_CASE("req sketch: serialize deserialize stream and bytes equivalence", "[req_sketch]") {
327
+ req_sketch<float> sketch(12);
328
+ const size_t n = 100000;
329
+ for (size_t i = 0; i < n; ++i) sketch.update(i);
330
+ REQUIRE(sketch.is_estimation_mode());
331
+
332
+ std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
333
+ sketch.serialize(s);
334
+ auto bytes = sketch.serialize();
335
+ REQUIRE(bytes.size() == static_cast<size_t>(s.tellp()));
336
+ for (size_t i = 0; i < bytes.size(); ++i) {
337
+ REQUIRE(((char*)bytes.data())[i] == (char)s.get());
338
+ }
339
+
340
+ s.seekg(0); // rewind
341
+ auto sketch1 = req_sketch<float>::deserialize(s);
342
+ auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
343
+ REQUIRE(bytes.size() == static_cast<size_t>(s.tellg()));
344
+ REQUIRE(sketch2.is_empty() == sketch1.is_empty());
345
+ REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
346
+ REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
347
+ REQUIRE(sketch2.get_n() == sketch.get_n());
348
+ REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
349
+ REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
350
+ }
351
+
352
+ TEST_CASE("req sketch: stream deserialize from Java - empty", "[req_sketch]") {
353
+ std::ifstream is;
354
+ is.exceptions(std::ios::failbit | std::ios::badbit);
355
+ is.open(input_path + "req_float_empty_from_java.sk", std::ios::binary);
356
+ auto sketch = req_sketch<float>::deserialize(is);
357
+ REQUIRE(sketch.is_empty());
358
+ REQUIRE_FALSE(sketch.is_estimation_mode());
359
+ REQUIRE(sketch.get_n() == 0);
360
+ REQUIRE(sketch.get_num_retained() == 0);
361
+ REQUIRE(std::isnan(sketch.get_min_value()));
362
+ REQUIRE(std::isnan(sketch.get_max_value()));
363
+ }
364
+
365
+ TEST_CASE("req sketch: stream deserialize from Java - single item", "[req_sketch]") {
366
+ std::ifstream is;
367
+ is.exceptions(std::ios::failbit | std::ios::badbit);
368
+ is.open(input_path + "req_float_single_item_from_java.sk", std::ios::binary);
369
+ auto sketch = req_sketch<float>::deserialize(is);
370
+ REQUIRE_FALSE(sketch.is_empty());
371
+ REQUIRE_FALSE(sketch.is_estimation_mode());
372
+ REQUIRE(sketch.get_n() == 1);
373
+ REQUIRE(sketch.get_num_retained() == 1);
374
+ REQUIRE(sketch.get_min_value() == 1);
375
+ REQUIRE(sketch.get_max_value() == 1);
376
+ REQUIRE(sketch.get_rank(1) == 0);
377
+ REQUIRE(sketch.get_rank<true>(1) == 1);
378
+ }
379
+
380
+ TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]") {
381
+ std::ifstream is;
382
+ is.exceptions(std::ios::failbit | std::ios::badbit);
383
+ is.open(input_path + "req_float_raw_items_from_java.sk", std::ios::binary);
384
+ auto sketch = req_sketch<float>::deserialize(is);
385
+ REQUIRE_FALSE(sketch.is_empty());
386
+ REQUIRE_FALSE(sketch.is_estimation_mode());
387
+ REQUIRE(sketch.get_n() == 4);
388
+ REQUIRE(sketch.get_num_retained() == 4);
389
+ REQUIRE(sketch.get_min_value() == 0);
390
+ REQUIRE(sketch.get_max_value() == 3);
391
+ REQUIRE(sketch.get_rank(2) == 0.5);
392
+ }
393
+
394
+ TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]") {
395
+ std::ifstream is;
396
+ is.exceptions(std::ios::failbit | std::ios::badbit);
397
+ is.open(input_path + "req_float_exact_from_java.sk", std::ios::binary);
398
+ auto sketch = req_sketch<float>::deserialize(is);
399
+ REQUIRE_FALSE(sketch.is_empty());
400
+ REQUIRE_FALSE(sketch.is_estimation_mode());
401
+ REQUIRE(sketch.get_n() == 100);
402
+ REQUIRE(sketch.get_num_retained() == 100);
403
+ REQUIRE(sketch.get_min_value() == 0);
404
+ REQUIRE(sketch.get_max_value() == 99);
405
+ REQUIRE(sketch.get_rank(50) == 0.5);
406
+ }
407
+
408
+ TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sketch]") {
409
+ std::ifstream is;
410
+ is.exceptions(std::ios::failbit | std::ios::badbit);
411
+ is.open(input_path + "req_float_estimation_from_java.sk", std::ios::binary);
412
+ auto sketch = req_sketch<float>::deserialize(is);
413
+ REQUIRE_FALSE(sketch.is_empty());
414
+ REQUIRE(sketch.is_estimation_mode());
415
+ REQUIRE(sketch.get_n() == 10000);
416
+ REQUIRE(sketch.get_num_retained() == 2942);
417
+ REQUIRE(sketch.get_min_value() == 0);
418
+ REQUIRE(sketch.get_max_value() == 9999);
419
+ REQUIRE(sketch.get_rank(5000) == 0.5);
420
+ }
421
+
422
+ TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
423
+ req_sketch<float> sketch1(40);
424
+
425
+ req_sketch<float> sketch2(40);
426
+ for (size_t i = 0; i < 1000; ++i) sketch2.update(i);
427
+
428
+ sketch1.merge(sketch2);
429
+ REQUIRE(sketch1.get_min_value() == 0);
430
+ REQUIRE(sketch1.get_max_value() == 999);
431
+ REQUIRE(sketch1.get_quantile(0.25) == Approx(250).margin(3));
432
+ REQUIRE(sketch1.get_quantile(0.5) == Approx(500).margin(3));
433
+ REQUIRE(sketch1.get_quantile(0.75) == Approx(750).margin(3));
434
+ REQUIRE(sketch1.get_rank(500) == Approx(0.5).margin(0.01));
435
+ }
436
+
437
+ TEST_CASE("req sketch: merge", "[req_sketch]") {
438
+ req_sketch<float> sketch1(100);
439
+ for (size_t i = 0; i < 1000; ++i) sketch1.update(i);
440
+
441
+ req_sketch<float> sketch2(100);
442
+ for (size_t i = 1000; i < 2000; ++i) sketch2.update(i);
443
+
444
+ sketch1.merge(sketch2);
445
+ REQUIRE(sketch1.get_min_value() == 0);
446
+ REQUIRE(sketch1.get_max_value() == 1999);
447
+ REQUIRE(sketch1.get_quantile(0.25) == Approx(500).margin(3));
448
+ REQUIRE(sketch1.get_quantile(0.5) == Approx(1000).margin(1));
449
+ REQUIRE(sketch1.get_quantile(0.75) == Approx(1500).margin(1));
450
+ REQUIRE(sketch1.get_rank(1000) == Approx(0.5).margin(0.01));
451
+ }
452
+
453
+ TEST_CASE("req sketch: merge multiple", "[req_sketch]") {
454
+ req_sketch<float> sketch1(12);
455
+ for (size_t i = 0; i < 40; ++i) sketch1.update(i);
456
+
457
+ req_sketch<float> sketch2(12);
458
+ for (size_t i = 40; i < 80; ++i) sketch2.update(i);
459
+
460
+ req_sketch<float> sketch3(12);
461
+ for (size_t i = 80; i < 120; ++i) sketch3.update(i);
462
+
463
+ req_sketch<float> sketch(12);
464
+ sketch.merge(sketch1);
465
+ sketch.merge(sketch2);
466
+ sketch.merge(sketch3);
467
+ REQUIRE(sketch.get_min_value() == 0);
468
+ REQUIRE(sketch.get_max_value() == 119);
469
+ REQUIRE(sketch.get_quantile(0.5) == Approx(60).margin(3));
470
+ REQUIRE(sketch.get_rank(60) == Approx(0.5).margin(0.01));
471
+ }
472
+
473
+ TEST_CASE("req sketch: merge incompatible HRA and LRA", "[req_sketch]") {
474
+ req_sketch<float> sketch1(12);
475
+ sketch1.update(1);
476
+
477
+ req_sketch<float> sketch2(12, false);
478
+ sketch2.update(1);
479
+
480
+ REQUIRE_THROWS_AS(sketch1.merge(sketch2), std::invalid_argument);
481
+ }
482
+
483
+ //TEST_CASE("for manual comparison with Java") {
484
+ // req_sketch<float> sketch(12, false);
485
+ // for (size_t i = 0; i < 100000; ++i) sketch.update(i);
486
+ // sketch.merge(sketch);
487
+ // std::ofstream os;
488
+ // os.exceptions(std::ios::failbit | std::ios::badbit);
489
+ // os.open("req_float_lra_12_100000_merged.sk", std::ios::binary);
490
+ // sketch.get_quantile(0.5); // force sorting level 0
491
+ // sketch.serialize(os);
492
+ //}
493
+
494
+ } /* namespace datasketches */