datasketches 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +2 -2
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/NOTICE +1 -1
  15. data/vendor/datasketches-cpp/README.md +1 -3
  16. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  21. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  23. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  24. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  25. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  26. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  27. data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
  28. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
  29. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
  30. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
  31. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
  32. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  33. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  34. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  35. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  36. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
  37. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  38. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  42. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  43. data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  46. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
  47. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
  48. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
  49. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  50. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  51. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  52. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  53. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  54. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  55. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  56. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  57. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
  58. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
  59. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  60. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
  62. data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
  63. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  64. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  65. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  66. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  67. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
  70. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  71. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
  74. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
  75. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
  76. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  77. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  78. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  79. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
  80. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
  84. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  85. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  86. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  87. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  88. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  89. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  90. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
  91. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  92. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
  94. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
  95. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
  96. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
  97. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  98. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  99. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  100. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  101. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  102. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  103. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
  107. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
  108. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  110. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  114. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  115. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  116. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  117. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
  120. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
  121. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  122. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  124. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
  125. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
  126. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
  127. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
  130. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  131. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  132. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  133. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  134. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  135. data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
  136. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  137. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  138. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  139. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  140. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  141. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  142. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  143. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  144. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  145. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  146. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  147. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  148. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  149. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  150. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  151. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  152. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
  153. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  154. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  155. metadata +47 -93
  156. data/vendor/datasketches-cpp/MANIFEST.in +0 -39
  157. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  160. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  161. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  162. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  163. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  164. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  165. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  166. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  167. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  168. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
  169. data/vendor/datasketches-cpp/python/README.md +0 -85
  170. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
  171. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
  172. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
  173. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
  174. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
  175. data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
  176. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
  177. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  178. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
  179. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
  180. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  181. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  182. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  183. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  184. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  185. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  186. data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
  187. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
  188. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
  189. data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
  190. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
  191. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
  192. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
  193. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
  194. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  195. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
  196. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
  197. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
  198. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
  199. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
  200. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  201. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  202. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  203. data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
  204. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  205. data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
  206. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
  207. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
  208. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
  209. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
  210. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
  211. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
  212. data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
  213. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  214. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
  215. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  216. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  217. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  218. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  219. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  220. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  221. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  222. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  223. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  224. data/vendor/datasketches-cpp/setup.py +0 -110
  225. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tox.ini +0 -26
  230. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  231. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  232. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  233. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  234. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -0,0 +1,266 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <ebpps_sketch.hpp>
21
+
22
+ #include <catch2/catch.hpp>
23
+
24
+ #include <vector>
25
+ #include <string>
26
+ #include <sstream>
27
+ #include <fstream>
28
+ #include <cmath>
29
+ #include <random>
30
+ #include <stdexcept>
31
+
32
+ #ifdef TEST_BINARY_INPUT_PATH
33
+ static std::string testBinaryInputPath = TEST_BINARY_INPUT_PATH;
34
+ #else
35
+ static std::string testBinaryInputPath = "test/";
36
+ #endif
37
+
38
+ namespace datasketches {
39
+
40
+ static constexpr double EPS = 1e-13;
41
+
42
+ static ebpps_sketch<int> create_unweighted_sketch(uint32_t k, uint64_t n) {
43
+ ebpps_sketch<int> sk(k);
44
+ for (uint64_t i = 0; i < n; ++i) {
45
+ sk.update(static_cast<int>(i), 1.0);
46
+ }
47
+ return sk;
48
+ }
49
+
50
+ template<typename T, typename A>
51
+ static void check_if_equal(ebpps_sketch<T, A>& sk1, ebpps_sketch<T, A>& sk2) {
52
+ REQUIRE(sk1.get_k() == sk2.get_k());
53
+ REQUIRE(sk1.get_n() == sk2.get_n());
54
+ REQUIRE(sk1.get_c() == sk2.get_c());
55
+ REQUIRE(sk1.get_cumulative_weight() == sk2.get_cumulative_weight());
56
+
57
+ auto it1 = sk1.begin();
58
+ auto it2 = sk2.begin();
59
+ size_t count = 0;
60
+
61
+ while ((it1 != sk1.end()) && (it2 != sk2.end())) {
62
+ REQUIRE(*it1 == *it2);
63
+ ++it1;
64
+ ++it2;
65
+ ++count;
66
+ }
67
+
68
+ REQUIRE(((count == std::floor(sk1.get_c())) || (count == std::ceil(sk1.get_c()))));
69
+
70
+ // if c != floor(c) one sketch may not have reached the end,
71
+ // but that's not testable from the external API
72
+ }
73
+
74
+ TEST_CASE("ebpps sketch: invalid k", "[ebpps_sketch]") {
75
+ REQUIRE_THROWS_AS(ebpps_sketch<int>(0), std::invalid_argument);
76
+ REQUIRE_THROWS_AS(ebpps_sketch<int>(ebpps_constants::MAX_K + 1), std::invalid_argument);
77
+ }
78
+
79
+ TEST_CASE("ebpps sketch: invalid weights", "[ebpps_sketch]") {
80
+ uint32_t k = 100;
81
+ ebpps_sketch<int> sk = create_unweighted_sketch(k, 3);
82
+ REQUIRE(sk.get_n() == 3);
83
+ REQUIRE(sk.get_cumulative_weight() == 3.0);
84
+ sk.update(-1, 0.0); // no-op
85
+ REQUIRE(sk.get_n() == 3);
86
+ REQUIRE(sk.get_cumulative_weight() == 3.0);
87
+
88
+ REQUIRE_THROWS_AS(sk.update(-2, -1.0), std::invalid_argument);
89
+
90
+ ebpps_sketch<float> sk2(k);
91
+ REQUIRE_THROWS_AS(sk2.update(-2, std::numeric_limits<float>::infinity()), std::invalid_argument);
92
+ REQUIRE_THROWS_AS(sk2.update(-2, nanf("")), std::invalid_argument);
93
+ }
94
+
95
+ TEST_CASE("ebpps sketch: insert items", "[ebpps_sketch]") {
96
+ size_t n = 0;
97
+ uint32_t k = 5;
98
+ ebpps_sketch<int> sk = create_unweighted_sketch(k, n);
99
+ REQUIRE(sk.get_allocator() == std::allocator<int>());
100
+ REQUIRE(sk.get_k() == k);
101
+ REQUIRE(sk.get_n() == 0);
102
+ REQUIRE(sk.get_c() == 0.0);
103
+ REQUIRE(sk.get_cumulative_weight() == 0.0);
104
+ REQUIRE(sk.is_empty());
105
+
106
+ n = k;
107
+ sk = create_unweighted_sketch(k, n);
108
+ REQUIRE_FALSE(sk.is_empty());
109
+ REQUIRE(sk.get_n() == n);
110
+ REQUIRE(sk.get_cumulative_weight() == static_cast<double>(n));
111
+ for (int val : sk.get_result())
112
+ REQUIRE(val < static_cast<int>(n));
113
+
114
+ n = k * 10;
115
+ sk = create_unweighted_sketch(k, n);
116
+ REQUIRE_FALSE(sk.is_empty());
117
+ REQUIRE(sk.get_n() == n);
118
+ REQUIRE(sk.get_cumulative_weight() == static_cast<double>(n));
119
+
120
+ auto result = sk.get_result();
121
+ REQUIRE(result.size() == sk.get_k()); // uniform weights so should be exactly k
122
+ for (int val : sk.get_result())
123
+ REQUIRE(val < static_cast<int>(n));
124
+ }
125
+
126
+ TEST_CASE("ebpps sketch: serialize/deserialize string", "[ebpps_sketch]") {
127
+ // since C <= k we don't have the usual sketch notion of exact vs estimation
128
+ // mode at any time. The only real serializaiton cases are empty and non-empty
129
+ // with and without a partial item
130
+ uint32_t k = 10;
131
+ ebpps_sketch<std::string> sk(k);
132
+
133
+ // empty
134
+ auto bytes = sk.serialize();
135
+ REQUIRE(bytes.size() == sk.get_serialized_size_bytes());
136
+ REQUIRE_THROWS_AS(ebpps_sketch<std::string>::deserialize(bytes.data(), bytes.size()-1), std::out_of_range);
137
+ auto sk_bytes = ebpps_sketch<std::string>::deserialize(bytes.data(), bytes.size());
138
+ check_if_equal(sk, sk_bytes);
139
+
140
+ std::stringstream ss(std::ios::in | std::ios::out | std::ios::binary);
141
+ sk.serialize(ss);
142
+ auto sk_stream = ebpps_sketch<std::string>::deserialize(ss);
143
+ check_if_equal(sk, sk_stream);
144
+ check_if_equal(sk_bytes, sk_stream); // should be redundant
145
+
146
+ for (uint32_t i = 0; i < k; ++i)
147
+ sk.update(std::to_string(i));
148
+
149
+ // non-empty, no partial item
150
+ bytes = sk.serialize();
151
+ REQUIRE(bytes.size() == sk.get_serialized_size_bytes());
152
+ REQUIRE_THROWS_AS(ebpps_sketch<std::string>::deserialize(bytes.data(), bytes.size()-1), std::out_of_range);
153
+ sk_bytes = ebpps_sketch<std::string>::deserialize(bytes.data(), bytes.size());
154
+ check_if_equal(sk, sk_bytes);
155
+
156
+ ss.str("");
157
+ sk.serialize(ss);
158
+ sk_stream = ebpps_sketch<std::string>::deserialize(ss);
159
+ check_if_equal(sk, sk_stream);
160
+ check_if_equal(sk_bytes, sk_stream); // should be redundant
161
+
162
+ // non-empty with partial item
163
+ sk.update(std::to_string(2 * k), 2.5);
164
+ REQUIRE(sk.get_cumulative_weight() == Approx(k + 2.5).margin(EPS));
165
+ bytes = sk.serialize();
166
+ REQUIRE(bytes.size() == sk.get_serialized_size_bytes());
167
+ REQUIRE_THROWS_AS(ebpps_sketch<std::string>::deserialize(bytes.data(), bytes.size()-1), std::out_of_range);
168
+ sk_bytes = ebpps_sketch<std::string>::deserialize(bytes.data(), bytes.size());
169
+ check_if_equal(sk, sk_bytes);
170
+
171
+ ss.str("");
172
+ sk.serialize(ss);
173
+ sk_stream = ebpps_sketch<std::string>::deserialize(ss);
174
+ check_if_equal(sk, sk_stream);
175
+ check_if_equal(sk_bytes, sk_stream); // should be redundant
176
+ }
177
+
178
+ TEST_CASE("ebpps sketch: serialize/deserialize ints", "[ebpps_sketch]") {
179
+ uint32_t k = 10;
180
+ ebpps_sketch<uint32_t> sk(k);
181
+
182
+ for (uint32_t i = 0; i < k; ++i)
183
+ sk.update(i);
184
+ sk.update(2 * k, 3.5);
185
+ REQUIRE(sk.get_cumulative_weight() == Approx(k + 3.5).margin(EPS));
186
+
187
+ auto bytes = sk.serialize();
188
+ REQUIRE(bytes.size() == sk.get_serialized_size_bytes());
189
+ REQUIRE_THROWS_AS(ebpps_sketch<uint32_t>::deserialize(bytes.data(), bytes.size()-1), std::out_of_range);
190
+ auto sk_bytes = ebpps_sketch<uint32_t>::deserialize(bytes.data(), bytes.size());
191
+ check_if_equal(sk, sk_bytes);
192
+
193
+ std::stringstream ss(std::ios::in | std::ios::out | std::ios::binary);
194
+ sk.serialize(ss);
195
+ auto sk_stream = ebpps_sketch<uint32_t>::deserialize(ss);
196
+ check_if_equal(sk, sk_stream);
197
+ check_if_equal(sk_bytes, sk_stream); // should be redundant
198
+
199
+ sk.reset();
200
+ REQUIRE(sk.get_k() == k);
201
+ REQUIRE(sk.get_n() == 0);
202
+ REQUIRE(sk.get_c() == 0.0);
203
+ REQUIRE(sk.get_cumulative_weight() == 0.0);
204
+ REQUIRE(sk.is_empty());
205
+ }
206
+
207
+ TEST_CASE("ebpps sketch: merge large into small", "[ebpps_sketch]") {
208
+ uint32_t k = 100;
209
+
210
+ // lvalue merge
211
+ ebpps_sketch<int> sk1(k / 2);
212
+ sk1.update(-1, k / 10.0); // one heavy item, but less than sk2 weight
213
+ ebpps_sketch<int> sk2 = create_unweighted_sketch(k, k);
214
+
215
+ sk1.merge(sk2);
216
+ REQUIRE(sk1.get_k() == k / 2);
217
+ REQUIRE(sk1.get_n() == k + 1);
218
+ REQUIRE(sk1.get_c() < k);
219
+ REQUIRE(sk1.get_cumulative_weight() == Approx(1.1 * k).margin(EPS));
220
+
221
+ // rvalue merge
222
+ sk1 = create_unweighted_sketch(k / 2, 0);
223
+ sk1.update(-1, k / 4.0);
224
+ sk1.update(-2, k / 8.0);
225
+ // sk2 should have been unchaged
226
+ REQUIRE(sk2.get_n() == k);
227
+ REQUIRE(sk2.get_c() == Approx(k).margin(EPS));
228
+
229
+ sk1.merge(std::move(sk2));
230
+ REQUIRE(sk1.get_k() == k / 2);
231
+ REQUIRE(sk1.get_n() == k + 2);
232
+ REQUIRE(sk1.get_c() < k);
233
+ // cumulative weight is now (1.5 + 0.2) k
234
+ REQUIRE(sk1.get_cumulative_weight() == Approx(1.375 * k).margin(EPS));
235
+ }
236
+
237
+ TEST_CASE("ebpps sketch: merge small into large", "[ebpps_sketch]") {
238
+ uint32_t k = 100;
239
+
240
+ // lvalue merge
241
+ ebpps_sketch<int> sk1 = create_unweighted_sketch(k, k);
242
+ ebpps_sketch<int> sk2(k / 2);
243
+ sk2.update(-1, k / 10.0); // one heavy item, but less than sk1 weight
244
+
245
+ sk1.merge(sk2);
246
+ REQUIRE(sk1.get_k() == k / 2);
247
+ REQUIRE(sk1.get_n() == k + 1);
248
+ REQUIRE(sk1.get_c() < k);
249
+ REQUIRE(sk1.get_cumulative_weight() == Approx(1.1 * k).margin(EPS));
250
+
251
+ // rvalue merge
252
+ sk1 = create_unweighted_sketch(k, 3 * k / 2);
253
+ // sk2 should have been unchaged
254
+ REQUIRE(sk2.get_n() == 1);
255
+ REQUIRE(sk2.get_c() == 1.0);
256
+ sk2.update(-2, k / 10.0);
257
+
258
+ sk1.merge(std::move(sk2));
259
+ REQUIRE(sk1.get_k() == k / 2);
260
+ REQUIRE(sk1.get_n() == (3 * k / 2) + 2);
261
+ REQUIRE(sk1.get_c() < k);
262
+ // cumulative weight is now (1.5 + 0.2) k
263
+ REQUIRE(sk1.get_cumulative_weight() == Approx(1.7 * k).margin(EPS));
264
+ }
265
+
266
+ }
@@ -0,0 +1,81 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch2/catch.hpp>
21
+ #include <fstream>
22
+ #include <var_opt_sketch.hpp>
23
+
24
+ namespace datasketches {
25
+
26
+ // assume the binary sketches for this test have been generated by datasketches-java code
27
+ // in the subdirectory called "java" in the root directory of this project
28
+ static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";
29
+
30
+ TEST_CASE("var opt sketch long", "[serde_compat]") {
31
+ const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
32
+ for (const unsigned n: n_arr) {
33
+ std::ifstream is;
34
+ is.exceptions(std::ios::failbit | std::ios::badbit);
35
+ is.open(testBinaryInputPath + "varopt_sketch_long_n" + std::to_string(n) + "_java.sk", std::ios::binary);
36
+ const auto sketch = var_opt_sketch<long>::deserialize(is);
37
+ REQUIRE(sketch.is_empty() == (n == 0));
38
+ REQUIRE(sketch.get_num_samples() == (n > 10 ? 32 : n));
39
+ }
40
+ }
41
+
42
+ TEST_CASE("var opt sketch: deserialize exact from java", "[serde_compat]") {
43
+ const double EPS = 1e-13;
44
+ std::ifstream is;
45
+ is.exceptions(std::ios::failbit | std::ios::badbit);
46
+ is.open(testBinaryInputPath + "varopt_sketch_string_exact_java.sk", std::ios::binary);
47
+ const auto sketch = var_opt_sketch<std::string>::deserialize(is);
48
+ REQUIRE_FALSE(sketch.is_empty());
49
+ REQUIRE(sketch.get_k() == 1024);
50
+ REQUIRE(sketch.get_n() == 200);
51
+ REQUIRE(sketch.get_num_samples() == 200);
52
+ const subset_summary ss = sketch.estimate_subset_sum([](std::string){ return true; });
53
+
54
+ double tgt_wt = 0.0;
55
+ for (int i = 1; i <= 200; ++i) { tgt_wt += 1000.0 / i; }
56
+ REQUIRE(ss.total_sketch_weight == Approx(tgt_wt).margin(EPS));
57
+ }
58
+
59
+
60
+ TEST_CASE("var opt sketch: deserialize sampling from java", "[serde_compat]") {
61
+ const double EPS = 1e-13;
62
+ std::ifstream is;
63
+ is.exceptions(std::ios::failbit | std::ios::badbit);
64
+ is.open(testBinaryInputPath + "varopt_sketch_long_sampling_java.sk", std::ios::binary);
65
+ const auto sketch = var_opt_sketch<int64_t>::deserialize(is);
66
+ REQUIRE_FALSE(sketch.is_empty());
67
+ REQUIRE(sketch.get_k() == 1024);
68
+ REQUIRE(sketch.get_n() == 2003);
69
+ REQUIRE(sketch.get_num_samples() == sketch.get_k());
70
+ subset_summary ss = sketch.estimate_subset_sum([](int64_t){ return true; });
71
+ REQUIRE(ss.estimate == Approx(332000.0).margin(EPS));
72
+ REQUIRE(ss.total_sketch_weight == Approx(332000.0).margin(EPS));
73
+
74
+ ss = sketch.estimate_subset_sum([](int64_t x){ return x < 0; });
75
+ REQUIRE(ss.estimate == 330000.0); // heavy item, weight is exact
76
+
77
+ ss = sketch.estimate_subset_sum([](int64_t x){ return x >= 0; });
78
+ REQUIRE(ss.estimate == Approx(2000.0).margin(EPS));
79
+ }
80
+
81
+ } /* namespace datasketches */
@@ -0,0 +1,54 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch2/catch.hpp>
21
+ #include <fstream>
22
+ #include <var_opt_sketch.hpp>
23
+
24
+ namespace datasketches {
25
+
26
+ TEST_CASE("varopt sketch long generate", "[serialize_for_java]") {
27
+ const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
28
+ for (const unsigned n: n_arr) {
29
+ var_opt_sketch<long> sketch(32);
30
+ for (unsigned i = 1; i <= n; ++i) sketch.update(i);
31
+ std::ofstream os("varopt_sketch_long_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
32
+ sketch.serialize(os);
33
+ }
34
+ }
35
+
36
+ TEST_CASE("varopt sketch string exact", "[serialize_for_java]") {
37
+ var_opt_sketch<std::string> sketch(1024);
38
+ for (unsigned i = 1; i <= 200; ++i) sketch.update(std::to_string(i), 1000.0 / i);
39
+ std::ofstream os("varopt_sketch_string_exact_cpp.sk", std::ios::binary);
40
+ sketch.serialize(os);
41
+ }
42
+
43
+ TEST_CASE("varopt sketch long sampling", "[serialize_for_java]") {
44
+ var_opt_sketch<long> sketch(1024);
45
+ for (unsigned i = 0; i < 2000; ++i) sketch.update(i);
46
+ // negative heavy items to allow a simple predicate to filter
47
+ sketch.update(-1L, 100000.0);
48
+ sketch.update(-2L, 110000.0);
49
+ sketch.update(-3L, 120000.0);
50
+ std::ofstream os("varopt_sketch_long_sampling_cpp.sk", std::ios::binary);
51
+ sketch.serialize(os);
52
+ }
53
+
54
+ } /* namespace datasketches */
@@ -489,41 +489,4 @@ TEST_CASE("varopt sketch: estimate subset sum", "[var_opt_sketch]") {
489
489
  REQUIRE(summary.estimate < total_weight); // exact mode, so know it must be strictly less
490
490
  }
491
491
 
492
- TEST_CASE("varopt sketch: deserialize exact from java", "[var_opt_sketch]") {
493
- std::ifstream is;
494
- is.exceptions(std::ios::failbit | std::ios::badbit);
495
- is.open(testBinaryInputPath + "varopt_sketch_string_exact.sk", std::ios::binary);
496
- var_opt_sketch<std::string> sketch = var_opt_sketch<std::string>::deserialize(is);
497
- REQUIRE_FALSE(sketch.is_empty());
498
- REQUIRE(sketch.get_k() == 1024);
499
- REQUIRE(sketch.get_n() == 200);
500
- REQUIRE(sketch.get_num_samples() == 200);
501
- subset_summary ss = sketch.estimate_subset_sum([](std::string){ return true; });
502
-
503
- double tgt_wt = 0.0;
504
- for (int i = 1; i <= 200; ++i) { tgt_wt += 1000.0 / i; }
505
- REQUIRE(ss.total_sketch_weight == Approx(tgt_wt).margin(EPS));
506
- }
507
-
508
-
509
- TEST_CASE("varopt sketch: deserialize sampling from java", "[var_opt_sketch]") {
510
- std::ifstream is;
511
- is.exceptions(std::ios::failbit | std::ios::badbit);
512
- is.open(testBinaryInputPath + "varopt_sketch_long_sampling.sk", std::ios::binary);
513
- var_opt_sketch<int64_t> sketch = var_opt_sketch<int64_t>::deserialize(is);
514
- REQUIRE_FALSE(sketch.is_empty());
515
- REQUIRE(sketch.get_k() == 1024);
516
- REQUIRE(sketch.get_n() == 2003);
517
- REQUIRE(sketch.get_num_samples() == sketch.get_k());
518
- subset_summary ss = sketch.estimate_subset_sum([](int64_t){ return true; });
519
- REQUIRE(ss.estimate == Approx(332000.0).margin(EPS));
520
- REQUIRE(ss.total_sketch_weight == Approx(332000.0).margin(EPS));
521
-
522
- ss = sketch.estimate_subset_sum([](int64_t x){ return x < 0; });
523
- REQUIRE(ss.estimate == 330000.0); // heavy item, weight is exact
524
-
525
- ss = sketch.estimate_subset_sum([](int64_t x){ return x >= 0; });
526
- REQUIRE(ss.estimate == Approx(2000.0).margin(EPS));
527
- }
528
-
529
492
  }
@@ -0,0 +1,50 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch2/catch.hpp>
21
+ #include <fstream>
22
+ #include <var_opt_union.hpp>
23
+
24
+ namespace datasketches {
25
+
26
+ // assume the binary sketches for this test have been generated by datasketches-java code
27
+ // in the subdirectory called "java" in the root directory of this project
28
+ static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";
29
+
30
+ TEST_CASE("var opt union double", "[serde_compat]") {
31
+ const double EPS = 1e-13;
32
+
33
+ std::ifstream is;
34
+ is.exceptions(std::ios::failbit | std::ios::badbit);
35
+ is.open(testBinaryInputPath + "varopt_union_double_sampling_java.sk", std::ios::binary);
36
+ auto u = var_opt_union<double>::deserialize(is);
37
+
38
+ // must reduce k in the process
39
+ const auto result = u.get_result();
40
+ REQUIRE_FALSE(result.is_empty());
41
+ REQUIRE(result.get_n() == 97);
42
+
43
+ const double expected_wt = 96.0; // light items -- ignoring the heavy one
44
+ const subset_summary ss = result.estimate_subset_sum([](double x){return x >= 0;});
45
+ REQUIRE(ss.estimate == Approx(expected_wt).margin(EPS));
46
+ REQUIRE(ss.total_sketch_weight == Approx(expected_wt + 1024.0).margin(EPS));
47
+ REQUIRE(result.get_k() < 128);
48
+ }
49
+
50
+ } /* namespace datasketches */
@@ -0,0 +1,56 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch2/catch.hpp>
21
+ #include <fstream>
22
+ #include <var_opt_union.hpp>
23
+
24
+ namespace datasketches {
25
+
26
+ TEST_CASE("var opt union double sampling", "[serialize_for_java]") {
27
+ const unsigned k_small = 16;
28
+ const unsigned k_max = 128;
29
+ const unsigned n1 = 32;
30
+ const unsigned n2 = 64;
31
+
32
+ // small k sketch, but sampling
33
+ var_opt_sketch<double> sketch1(k_small);
34
+ for (unsigned i = 0; i < n1; ++i) sketch1.update(i);
35
+ // negative heavy item to allow a simple predicate to filter
36
+ sketch1.update(-1, n1 * n1);
37
+
38
+ // another one, but different n to get a different per-item weight
39
+ var_opt_sketch<double> sketch2(k_small);
40
+ for (unsigned i = 0; i < n2; ++i) sketch2.update(i);
41
+
42
+ var_opt_union<double> u(k_max);
43
+ u.update(sketch1);
44
+ u.update(sketch2);
45
+
46
+ // must reduce k in the process
47
+ auto result = u.get_result();
48
+ REQUIRE(result.get_k() < k_max);
49
+ REQUIRE(result.get_k() >= k_small);
50
+ REQUIRE(result.get_n() == 97);
51
+
52
+ std::ofstream os("varopt_union_double_sampling_cpp.sk", std::ios::binary);
53
+ u.serialize(os);
54
+ }
55
+
56
+ } /* namespace datasketches */
@@ -305,22 +305,4 @@ TEST_CASE("varopt union: serialize sampling", "[var_opt_union]") {
305
305
  compare_serialization_deserialization(u);
306
306
  }
307
307
 
308
- TEST_CASE("varopt union: deserialize from java", "[var_opt_union]") {
309
- std::ifstream is;
310
- is.exceptions(std::ios::failbit | std::ios::badbit);
311
- is.open(testBinaryInputPath + "varopt_union_double_sampling.sk", std::ios::binary);
312
- var_opt_union<double> u = var_opt_union<double>::deserialize(is);
313
-
314
- // must reduce k in the process, like in small_sampling_sketch()
315
- var_opt_sketch<double> result = u.get_result();
316
- REQUIRE_FALSE(result.is_empty());
317
- REQUIRE(result.get_n() == 97);
318
-
319
- double expected_wt = 96.0;// light items -- ignoring the heavy one
320
- subset_summary ss = result.estimate_subset_sum([](double x){return x >= 0;});
321
- REQUIRE(ss.estimate == Approx(expected_wt).margin(EPS));
322
- REQUIRE(ss.total_sketch_weight == Approx(expected_wt + 1024.0).margin(EPS));
323
- REQUIRE(result.get_k() < 128);
324
- }
325
-
326
308
  }