datasketches 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +2 -2
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/NOTICE +1 -1
  15. data/vendor/datasketches-cpp/README.md +1 -3
  16. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  21. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  23. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  24. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  25. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  26. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  27. data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
  28. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
  29. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
  30. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
  31. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
  32. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  33. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  34. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  35. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  36. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
  37. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  38. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  42. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  43. data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  46. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
  47. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
  48. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
  49. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  50. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  51. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  52. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  53. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  54. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  55. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  56. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  57. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
  58. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
  59. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  60. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
  62. data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
  63. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  64. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  65. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  66. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  67. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
  70. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  71. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
  74. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
  75. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
  76. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  77. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  78. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  79. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
  80. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
  84. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  85. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  86. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  87. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  88. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  89. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  90. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
  91. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  92. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
  94. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
  95. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
  96. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
  97. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  98. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  99. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  100. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  101. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  102. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  103. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
  107. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
  108. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  110. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  114. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  115. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  116. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  117. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
  120. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
  121. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  122. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  124. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
  125. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
  126. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
  127. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
  130. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  131. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  132. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  133. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  134. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  135. data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
  136. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  137. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  138. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  139. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  140. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  141. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  142. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  143. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  144. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  145. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  146. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  147. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  148. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  149. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  150. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  151. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  152. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
  153. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  154. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  155. metadata +47 -93
  156. data/vendor/datasketches-cpp/MANIFEST.in +0 -39
  157. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  160. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  161. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  162. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  163. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  164. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  165. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  166. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  167. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  168. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
  169. data/vendor/datasketches-cpp/python/README.md +0 -85
  170. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
  171. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
  172. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
  173. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
  174. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
  175. data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
  176. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
  177. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  178. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
  179. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
  180. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  181. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  182. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  183. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  184. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  185. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  186. data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
  187. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
  188. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
  189. data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
  190. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
  191. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
  192. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
  193. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
  194. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  195. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
  196. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
  197. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
  198. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
  199. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
  200. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  201. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  202. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  203. data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
  204. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  205. data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
  206. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
  207. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
  208. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
  209. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
  210. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
  211. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
  212. data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
  213. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  214. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
  215. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  216. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  217. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  218. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  219. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  220. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  221. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  222. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  223. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  224. data/vendor/datasketches-cpp/setup.py +0 -110
  225. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tox.ini +0 -26
  230. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  231. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  232. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  233. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  234. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -0,0 +1,83 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch2/catch.hpp>
21
+ #include <fstream>
22
+ #include <frequent_items_sketch.hpp>
23
+
24
+ namespace datasketches {
25
+
26
+ TEST_CASE("frequent longs sketch generate", "[serialize_for_java]") {
27
+ const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
28
+ for (const unsigned n: n_arr) {
29
+ frequent_items_sketch<long> sketch(6);
30
+ for (unsigned i = 1; i <= n; ++i) sketch.update(i);
31
+ REQUIRE(sketch.is_empty() == (n == 0));
32
+ if (n > 10) {
33
+ REQUIRE(sketch.get_maximum_error() > 0);
34
+ } else {
35
+ REQUIRE(sketch.get_maximum_error() == 0);
36
+ }
37
+ REQUIRE(sketch.get_total_weight() == n);
38
+ std::ofstream os("frequent_long_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
39
+ sketch.serialize(os);
40
+ }
41
+ }
42
+
43
+ TEST_CASE("frequent strings sketch generate", "[serialize_for_java]") {
44
+ const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
45
+ for (const unsigned n: n_arr) {
46
+ frequent_items_sketch<std::string> sketch(6);
47
+ for (unsigned i = 1; i <= n; ++i) sketch.update(std::to_string(i));
48
+ REQUIRE(sketch.is_empty() == (n == 0));
49
+ if (n > 10) {
50
+ REQUIRE(sketch.get_maximum_error() > 0);
51
+ } else {
52
+ REQUIRE(sketch.get_maximum_error() == 0);
53
+ }
54
+ REQUIRE(sketch.get_total_weight() == n);
55
+ std::ofstream os("frequent_string_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
56
+ sketch.serialize(os);
57
+ }
58
+ }
59
+
60
+ TEST_CASE("frequent strings sketch ascii", "[serialize_for_java]") {
61
+ frequent_items_sketch<std::string> sketch(6);
62
+ sketch.update("aaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 1);
63
+ sketch.update("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb", 2);
64
+ sketch.update("ccccccccccccccccccccccccccccc", 3);
65
+ sketch.update("ddddddddddddddddddddddddddddd", 4);
66
+ std::ofstream os("frequent_string_ascii_cpp.sk", std::ios::binary);
67
+ sketch.serialize(os);
68
+ }
69
+
70
+ TEST_CASE("frequent strings sketch utf8", "[serialize_for_java]") {
71
+ frequent_items_sketch<std::string> sketch(6);
72
+ sketch.update("абвгд", 1);
73
+ sketch.update("еёжзи", 2);
74
+ sketch.update("йклмн", 3);
75
+ sketch.update("опрст", 4);
76
+ sketch.update("уфхцч", 5);
77
+ sketch.update("шщъыь", 6);
78
+ sketch.update("эюя", 7);
79
+ std::ofstream os("frequent_string_utf8_cpp.sk", std::ios::binary);
80
+ sketch.serialize(os);
81
+ }
82
+
83
+ } /* namespace datasketches */
@@ -70,6 +70,7 @@ TEST_CASE("frequent items: several items, no resize, no purge", "[frequent_items
70
70
  REQUIRE(sketch.get_estimate("b") == 3);
71
71
  REQUIRE(sketch.get_estimate("c") == 2);
72
72
  REQUIRE(sketch.get_estimate("d") == 1);
73
+ REQUIRE(sketch.get_maximum_error() == 0);
73
74
  }
74
75
 
75
76
  TEST_CASE("frequent items: several items, with resize, no purge", "[frequent_items_sketch]") {
@@ -96,6 +97,7 @@ TEST_CASE("frequent items: several items, with resize, no purge", "[frequent_ite
96
97
  REQUIRE(sketch.get_estimate("b") == 3);
97
98
  REQUIRE(sketch.get_estimate("c") == 2);
98
99
  REQUIRE(sketch.get_estimate("d") == 1);
100
+ REQUIRE(sketch.get_maximum_error() == 0);
99
101
  }
100
102
 
101
103
  TEST_CASE("frequent items: estimation mode", "[frequent_items_sketch]") {
@@ -149,6 +151,7 @@ TEST_CASE("frequent items: merge exact mode", "[frequent_items_sketch]") {
149
151
  REQUIRE(sketch1.get_estimate(2) == 3);
150
152
  REQUIRE(sketch1.get_estimate(3) == 2);
151
153
  REQUIRE(sketch1.get_estimate(4) == 1);
154
+ REQUIRE(sketch1.get_maximum_error() == 0);
152
155
  }
153
156
 
154
157
  TEST_CASE("frequent items: merge estimation mode", "[frequent_items_sketch]") {
@@ -199,48 +202,6 @@ TEST_CASE("frequent items: merge estimation mode", "[frequent_items_sketch]") {
199
202
  REQUIRE(9 <= items[1].get_estimate()); // always overestimated
200
203
  }
201
204
 
202
- TEST_CASE("frequent items: deserialize from java long", "[frequent_items_sketch]") {
203
- std::ifstream is;
204
- is.exceptions(std::ios::failbit | std::ios::badbit);
205
- is.open(testBinaryInputPath + "longs_sketch_from_java.sk", std::ios::binary);
206
- auto sketch = frequent_items_sketch<long long>::deserialize(is);
207
- REQUIRE_FALSE(sketch.is_empty());
208
- REQUIRE(sketch.get_total_weight() == 4);
209
- REQUIRE(sketch.get_num_active_items() == 4);
210
- REQUIRE(sketch.get_estimate(1) == 1);
211
- REQUIRE(sketch.get_estimate(2) == 1);
212
- REQUIRE(sketch.get_estimate(3) == 1);
213
- REQUIRE(sketch.get_estimate(4) == 1);
214
- }
215
-
216
- TEST_CASE("frequent items: deserialize from java string", "[frequent_items_sketch]") {
217
- std::ifstream is;
218
- is.exceptions(std::ios::failbit | std::ios::badbit);
219
- is.open(testBinaryInputPath + "items_sketch_string_from_java.sk", std::ios::binary);
220
- auto sketch = frequent_items_sketch<std::string>::deserialize(is);
221
- REQUIRE_FALSE(sketch.is_empty());
222
- REQUIRE(sketch.get_total_weight() == 4);
223
- REQUIRE(sketch.get_num_active_items() == 4);
224
- REQUIRE(sketch.get_estimate("aaaaaaaaaaaaaaaaaaaaaaaaaaaaa") == 1);
225
- REQUIRE(sketch.get_estimate("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb") == 1);
226
- REQUIRE(sketch.get_estimate("ccccccccccccccccccccccccccccc") == 1);
227
- REQUIRE(sketch.get_estimate("ddddddddddddddddddddddddddddd") == 1);
228
- }
229
-
230
- TEST_CASE("frequent items: deserialize from java string, utf-8", "[frequent_items_sketch]") {
231
- std::ifstream is;
232
- is.exceptions(std::ios::failbit | std::ios::badbit);
233
- is.open(testBinaryInputPath + "items_sketch_string_utf8_from_java.sk", std::ios::binary);
234
- auto sketch = frequent_items_sketch<std::string>::deserialize(is);
235
- REQUIRE_FALSE(sketch.is_empty());
236
- REQUIRE(sketch.get_total_weight() == 10);
237
- REQUIRE(sketch.get_num_active_items() == 4);
238
- REQUIRE(sketch.get_estimate("абвгд") == 1);
239
- REQUIRE(sketch.get_estimate("еёжзи") == 2);
240
- REQUIRE(sketch.get_estimate("йклмн") == 3);
241
- REQUIRE(sketch.get_estimate("опрст") == 4);
242
- }
243
-
244
205
  TEST_CASE("frequent items: deserialize long64 stream", "[frequent_items_sketch]") {
245
206
  frequent_items_sketch<long long> sketch1(3);
246
207
  sketch1.update(1, 1);
@@ -169,9 +169,9 @@ CouponList<A>* CouponList<A>::newList(std::istream& is, const A& allocator) {
169
169
  }
170
170
 
171
171
  template<typename A>
172
- vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes) const {
172
+ auto CouponList<A>::serialize(bool compact, unsigned header_size_bytes) const -> vector_bytes {
173
173
  const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes;
174
- vector_u8<A> byteArr(sketchSizeBytes, 0, getAllocator());
174
+ vector_bytes byteArr(sketchSizeBytes, 0, getAllocator());
175
175
  uint8_t* bytes = byteArr.data() + header_size_bytes;
176
176
 
177
177
  bytes[hll_constants::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
@@ -33,12 +33,14 @@ class HllSketchImplFactory;
33
33
  template<typename A>
34
34
  class CouponList : public HllSketchImpl<A> {
35
35
  public:
36
+ using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
37
+
36
38
  CouponList(uint8_t lgConfigK, target_hll_type tgtHllType, hll_mode mode, const A& allocator);
37
39
  CouponList(const CouponList& that, target_hll_type tgtHllType);
38
40
 
39
41
  static CouponList* newList(const void* bytes, size_t len, const A& allocator);
40
42
  static CouponList* newList(std::istream& is, const A& allocator);
41
- virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const;
43
+ virtual vector_bytes serialize(bool compact, unsigned header_size_bytes) const;
42
44
  virtual void serialize(std::ostream& os, bool compact) const;
43
45
 
44
46
  virtual ~CouponList() = default;
@@ -216,9 +216,9 @@ HllArray<A>* HllArray<A>::newHll(std::istream& is, const A& allocator) {
216
216
  }
217
217
 
218
218
  template<typename A>
219
- vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) const {
219
+ auto HllArray<A>::serialize(bool compact, unsigned header_size_bytes) const -> vector_bytes {
220
220
  const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes;
221
- vector_u8<A> byteArr(sketchSizeBytes, 0, getAllocator());
221
+ vector_bytes byteArr(sketchSizeBytes, 0, getAllocator());
222
222
  uint8_t* bytes = byteArr.data() + header_size_bytes;
223
223
  AuxHashMap<A>* auxHashMap = getAuxHashMap();
224
224
 
@@ -537,7 +537,7 @@ AuxHashMap<A>* HllArray<A>::getAuxHashMap() const {
537
537
  }
538
538
 
539
539
  template<typename A>
540
- const vector_u8<A>& HllArray<A>::getHllArray() const {
540
+ auto HllArray<A>::getHllArray() const -> const vector_bytes& {
541
541
  return hllByteArr_;
542
542
  }
543
543
 
@@ -31,13 +31,15 @@ class AuxHashMap;
31
31
  template<typename A>
32
32
  class HllArray : public HllSketchImpl<A> {
33
33
  public:
34
+ using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
35
+
34
36
  HllArray(uint8_t lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator);
35
37
  explicit HllArray(const HllArray& other, target_hll_type tgtHllType);
36
38
 
37
39
  static HllArray* newHll(const void* bytes, size_t len, const A& allocator);
38
40
  static HllArray* newHll(std::istream& is, const A& allocator);
39
41
 
40
- virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const;
42
+ virtual vector_bytes serialize(bool compact, unsigned header_size_bytes) const;
41
43
  virtual void serialize(std::ostream& os, bool compact) const;
42
44
 
43
45
  virtual ~HllArray() = default;
@@ -97,7 +99,7 @@ class HllArray : public HllSketchImpl<A> {
97
99
 
98
100
  virtual A getAllocator() const;
99
101
 
100
- const vector_u8<A>& getHllArray() const;
102
+ const vector_bytes& getHllArray() const;
101
103
 
102
104
  protected:
103
105
  void hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue);
@@ -107,7 +109,7 @@ class HllArray : public HllSketchImpl<A> {
107
109
  double hipAccum_;
108
110
  double kxq0_;
109
111
  double kxq1_;
110
- vector_u8<A> hllByteArr_; //init by sub-classes
112
+ vector_bytes hllByteArr_; //init by sub-classes
111
113
  uint8_t curMin_; //always zero for Hll6 and Hll8, only tracked by Hll4Array
112
114
  uint32_t numAtCurMin_; //interpreted as num zeros when curMin == 0
113
115
  bool oooFlag_; //Out-Of-Order Flag
@@ -94,14 +94,14 @@ hll_sketch_alloc<A>::hll_sketch_alloc(HllSketchImpl<A>* that) :
94
94
  {}
95
95
 
96
96
  template<typename A>
97
- hll_sketch_alloc<A> hll_sketch_alloc<A>::operator=(const hll_sketch_alloc<A>& other) {
97
+ hll_sketch_alloc<A>& hll_sketch_alloc<A>::operator=(const hll_sketch_alloc<A>& other) {
98
98
  sketch_impl->get_deleter()(sketch_impl);
99
99
  sketch_impl = other.sketch_impl->copy();
100
100
  return *this;
101
101
  }
102
102
 
103
103
  template<typename A>
104
- hll_sketch_alloc<A> hll_sketch_alloc<A>::operator=(hll_sketch_alloc<A>&& other) {
104
+ hll_sketch_alloc<A>& hll_sketch_alloc<A>::operator=(hll_sketch_alloc<A>&& other) {
105
105
  std::swap(sketch_impl, other.sketch_impl);
106
106
  return *this;
107
107
  }
@@ -232,12 +232,12 @@ void hll_sketch_alloc<A>::serialize_updatable(std::ostream& os) const {
232
232
  }
233
233
 
234
234
  template<typename A>
235
- vector_u8<A> hll_sketch_alloc<A>::serialize_compact(unsigned header_size_bytes) const {
235
+ auto hll_sketch_alloc<A>::serialize_compact(unsigned header_size_bytes) const -> vector_bytes {
236
236
  return sketch_impl->serialize(true, header_size_bytes);
237
237
  }
238
238
 
239
239
  template<typename A>
240
- vector_u8<A> hll_sketch_alloc<A>::serialize_updatable() const {
240
+ auto hll_sketch_alloc<A>::serialize_updatable() const -> vector_bytes {
241
241
  return sketch_impl->serialize(false, 0);
242
242
  }
243
243
 
@@ -30,11 +30,13 @@ namespace datasketches {
30
30
  template<typename A>
31
31
  class HllSketchImpl {
32
32
  public:
33
+ using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
34
+
33
35
  HllSketchImpl(uint8_t lgConfigK, target_hll_type tgtHllType, hll_mode mode, bool startFullSize);
34
36
  virtual ~HllSketchImpl();
35
37
 
36
38
  virtual void serialize(std::ostream& os, bool compact) const = 0;
37
- virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const = 0;
39
+ virtual vector_bytes serialize(bool compact, unsigned header_size_bytes) const = 0;
38
40
 
39
41
  virtual HllSketchImpl* copy() const = 0;
40
42
  virtual HllSketchImpl* copyAs(target_hll_type tgtHllType) const = 0;
@@ -124,8 +124,6 @@ public:
124
124
  static uint32_t pair(uint32_t slotNo, uint8_t value);
125
125
  static uint32_t getLow26(uint32_t coupon);
126
126
  static uint8_t getValue(uint32_t coupon);
127
- static double invPow2(uint8_t e);
128
- static uint8_t ceilingPowerOf2(uint32_t n);
129
127
  static uint8_t simpleIntLog2(uint32_t n); // n must be power of 2
130
128
  static uint8_t computeLgArrInts(hll_mode mode, uint32_t count, uint8_t lgConfigK);
131
129
  static double getRelErr(bool upperBound, bool unioned, uint8_t lgConfigK, uint8_t numStdDev);
@@ -204,16 +202,6 @@ inline uint8_t HllUtil<A>::getValue(uint32_t coupon) {
204
202
  return coupon >> hll_constants::KEY_BITS_26;
205
203
  }
206
204
 
207
- template<typename A>
208
- inline double HllUtil<A>::invPow2(uint8_t e) {
209
- union {
210
- long long longVal;
211
- double doubleVal;
212
- } conv;
213
- conv.longVal = (1023L - e) << 52;
214
- return conv.doubleVal;
215
- }
216
-
217
205
  template<typename A>
218
206
  inline uint8_t HllUtil<A>::simpleIntLog2(uint32_t n) {
219
207
  if (n == 0) {
@@ -30,40 +30,15 @@
30
30
 
31
31
  namespace datasketches {
32
32
 
33
- /**
34
- * This is a high performance implementation of Phillipe Flajolet&#8217;s HLL sketch but with
35
- * significantly improved error behavior. If the ONLY use case for sketching is counting
36
- * uniques and merging, the HLL sketch is a reasonable choice, although the highest performing in terms of accuracy for
37
- * storage space consumed is CPC (Compressed Probabilistic Counting). For large enough counts, this HLL version (with HLL_4) can be 2 to
38
- * 16 times smaller than the Theta sketch family for the same accuracy.
39
- *
40
- * <p>This implementation offers three different types of HLL sketch, each with different
41
- * trade-offs with accuracy, space and performance. These types are specified with the
42
- * {@link TgtHllType} parameter.
43
- *
44
- * <p>In terms of accuracy, all three types, for the same <i>lg_config_k</i>, have the same error
45
- * distribution as a function of <i>n</i>, the number of unique values fed to the sketch.
46
- * The configuration parameter <i>lg_config_k</i> is the log-base-2 of <i>K</i>,
47
- * where <i>K</i> is the number of buckets or slots for the sketch.
48
- *
49
- * <p>During warmup, when the sketch has only received a small number of unique items
50
- * (up to about 10% of <i>K</i>), this implementation leverages a new class of estimator
51
- * algorithms with significantly better accuracy.
52
- *
53
- * <p>This sketch also offers the capability of operating off-heap. Given a WritableMemory object
54
- * created by the user, the sketch will perform all of its updates and internal phase transitions
55
- * in that object, which can actually reside either on-heap or off-heap based on how it is
56
- * configured. In large systems that must update and merge many millions of sketches, having the
57
- * sketch operate off-heap avoids the serialization and deserialization costs of moving sketches
58
- * to and from off-heap memory-mapped files, for example, and eliminates big garbage collection
59
- * delays.
60
- *
61
- * author Jon Malkin
62
- * author Lee Rhodes
63
- * author Kevin Lang
64
- */
33
+ // forward declarations
34
+ template<typename A> class hll_sketch_alloc;
35
+ template<typename A> class hll_union_alloc;
36
+
37
+ /// HLL sketch alias with default allocator
38
+ using hll_sketch = hll_sketch_alloc<std::allocator<uint8_t>>;
39
+ /// HLL union alias with default allocator
40
+ using hll_union = hll_union_alloc<std::allocator<uint8_t>>;
65
41
 
66
-
67
42
  /**
68
43
  * Specifies the target type of HLL sketch to be created. It is a target in that the actual
69
44
  * allocation of the HLL array is deferred until sufficient number of items have been received by
@@ -100,14 +75,41 @@ enum target_hll_type {
100
75
  HLL_8 ///< 8 bits per entry (fastest, fixed size)
101
76
  };
102
77
 
103
- template<typename A>
104
- class HllSketchImpl;
105
-
106
- template<typename A>
107
- class hll_union_alloc;
78
+ /**
79
+ * This is a high performance implementation of Phillipe Flajolet's HLL sketch but with
80
+ * significantly improved error behavior. If the ONLY use case for sketching is counting
81
+ * uniques and merging, the HLL sketch is a reasonable choice, although the highest performing in terms of accuracy for
82
+ * storage space consumed is CPC (Compressed Probabilistic Counting). For large enough counts, this HLL version (with HLL_4) can be 2 to
83
+ * 16 times smaller than the Theta sketch family for the same accuracy.
84
+ *
85
+ * <p>This implementation offers three different types of HLL sketch, each with different
86
+ * trade-offs with accuracy, space and performance. These types are specified with the
87
+ * {@link target_hll_type} parameter.
88
+ *
89
+ * <p>In terms of accuracy, all three types, for the same <i>lg_config_k</i>, have the same error
90
+ * distribution as a function of <i>n</i>, the number of unique values fed to the sketch.
91
+ * The configuration parameter <i>lg_config_k</i> is the log-base-2 of <i>K</i>,
92
+ * where <i>K</i> is the number of buckets or slots for the sketch.
93
+ *
94
+ * <p>During warmup, when the sketch has only received a small number of unique items
95
+ * (up to about 10% of <i>K</i>), this implementation leverages a new class of estimator
96
+ * algorithms with significantly better accuracy.
97
+ *
98
+ * <p>This sketch also offers the capability of operating off-heap. Given a WritableMemory object
99
+ * created by the user, the sketch will perform all of its updates and internal phase transitions
100
+ * in that object, which can actually reside either on-heap or off-heap based on how it is
101
+ * configured. In large systems that must update and merge many millions of sketches, having the
102
+ * sketch operate off-heap avoids the serialization and deserialization costs of moving sketches
103
+ * to and from off-heap memory-mapped files, for example, and eliminates big garbage collection
104
+ * delays.
105
+ *
106
+ * author Jon Malkin
107
+ * author Lee Rhodes
108
+ * author Kevin Lang
109
+ */
108
110
 
109
- template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
110
- template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
111
+ // forward declaration
112
+ template<typename A> class HllSketchImpl;
111
113
 
112
114
  template<typename A = std::allocator<uint8_t> >
113
115
  class hll_sketch_alloc final {
@@ -119,27 +121,33 @@ class hll_sketch_alloc final {
119
121
  * @param start_full_size Indicates whether to start in HLL mode,
120
122
  * keeping memory use constant (if HLL_6 or HLL_8) at the cost of
121
123
  * starting out using much more memory
124
+ * @param allocator instance of an Allocator
122
125
  */
123
126
  explicit hll_sketch_alloc(uint8_t lg_config_k, target_hll_type tgt_type = HLL_4, bool start_full_size = false, const A& allocator = A());
124
127
 
125
128
  /**
126
129
  * Copy constructor
130
+ * @param that sketch to be copied
127
131
  */
128
132
  hll_sketch_alloc(const hll_sketch_alloc<A>& that);
129
133
 
130
134
  /**
131
135
  * Copy constructor to a new target type
136
+ * @param that sketch to be copied
137
+ * @param tgt_type target_hll_type
132
138
  */
133
139
  hll_sketch_alloc(const hll_sketch_alloc<A>& that, target_hll_type tgt_type);
134
140
 
135
141
  /**
136
142
  * Move constructor
143
+ * @param that sketch to be moved
137
144
  */
138
145
  hll_sketch_alloc(hll_sketch_alloc<A>&& that) noexcept;
139
146
 
140
147
  /**
141
148
  * Reconstructs a sketch from a serialized image on a stream.
142
149
  * @param is An input stream with a binary image of a sketch
150
+ * @param allocator instance of an Allocator
143
151
  */
144
152
  static hll_sketch_alloc deserialize(std::istream& is, const A& allocator = A());
145
153
 
@@ -147,17 +155,26 @@ class hll_sketch_alloc final {
147
155
  * Reconstructs a sketch from a serialized image in a byte array.
148
156
  * @param bytes An input array with a binary image of a sketch
149
157
  * @param len Length of the input array, in bytes
158
+ * @param allocator instance of an Allocator
150
159
  */
151
160
  static hll_sketch_alloc deserialize(const void* bytes, size_t len, const A& allocator = A());
152
161
 
153
162
  //! Class destructor
154
163
  virtual ~hll_sketch_alloc();
155
164
 
156
- //! Copy assignment operator
157
- hll_sketch_alloc operator=(const hll_sketch_alloc<A>& other);
165
+ /**
166
+ * Copy assignment operator
167
+ * @param other sketch to be copied
168
+ * @return reference to this sketch
169
+ */
170
+ hll_sketch_alloc& operator=(const hll_sketch_alloc<A>& other);
158
171
 
159
- //! Move assignment operator
160
- hll_sketch_alloc operator=(hll_sketch_alloc<A>&& other);
172
+ /**
173
+ * Move assignment operator
174
+ * @param other sketch to be moved
175
+ * @return reference to this sketch
176
+ */
177
+ hll_sketch_alloc& operator=(hll_sketch_alloc<A>&& other);
161
178
 
162
179
  /**
163
180
  * Resets the sketch to an empty state in coupon collection mode.
@@ -165,18 +182,22 @@ class hll_sketch_alloc final {
165
182
  */
166
183
  void reset();
167
184
 
168
- typedef vector_u8<A> vector_bytes; // alias for users
185
+ // This is a convenience alias for users
186
+ // The type returned by the following serialize method
187
+ using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
169
188
 
170
189
  /**
171
190
  * Serializes the sketch to a byte array, compacting data structures
172
191
  * where feasible to eliminate unused storage in the serialized image.
173
192
  * @param header_size_bytes Allows for PostgreSQL integration
193
+ * @return serialized sketch in binary form
174
194
  */
175
195
  vector_bytes serialize_compact(unsigned header_size_bytes = 0) const;
176
196
 
177
197
  /**
178
198
  * Serializes the sketch to a byte array, retaining all internal
179
199
  * data structures in their current form.
200
+ * @return serialized sketch in binary form
180
201
  */
181
202
  vector_bytes serialize_updatable() const;
182
203
 
@@ -392,8 +413,6 @@ class hll_sketch_alloc final {
392
413
  bool is_out_of_order_flag() const;
393
414
  bool is_estimation_mode() const;
394
415
 
395
- typedef typename std::allocator_traits<A>::template rebind_alloc<hll_sketch_alloc> AllocHllSketch;
396
-
397
416
  HllSketchImpl<A>* sketch_impl;
398
417
  friend hll_union_alloc<A>;
399
418
  };
@@ -413,8 +432,8 @@ class hll_sketch_alloc final {
413
432
  * <p>Although the API for this union operator parallels many of the methods of the
414
433
  * <i>HllSketch</i>, the behavior of the union operator has some fundamental differences.
415
434
  *
416
- * <p>First, the user cannot specify the #tgt_hll_type as an input parameter.
417
- * Instead, it is specified for the sketch returned with #get_result(tgt_hll_tyope).
435
+ * <p>First, the user cannot specify the #target_hll_type as an input parameter.
436
+ * Instead, it is specified for the sketch returned with #get_result.
418
437
  *
419
438
  * <p>Second, the internal effective value of log-base-2 of <i>k</i> for the union operation can
420
439
  * change dynamically based on the smallest <i>lg_config_k</i> that the union operation has seen.
@@ -423,7 +442,6 @@ class hll_sketch_alloc final {
423
442
  * author Lee Rhodes
424
443
  * author Kevin Lang
425
444
  */
426
-
427
445
  template<typename A = std::allocator<uint8_t> >
428
446
  class hll_union_alloc {
429
447
  public:
@@ -431,6 +449,7 @@ class hll_union_alloc {
431
449
  * Construct an hll_union operator with the given maximum log2 of k.
432
450
  * @param lg_max_k The maximum size, in log2, of k. The value must
433
451
  * be between 7 and 21, inclusive.
452
+ * @param allocator instance of an Allocator
434
453
  */
435
454
  explicit hll_union_alloc(uint8_t lg_max_k, const A& allocator = A());
436
455
 
@@ -495,7 +514,7 @@ class hll_union_alloc {
495
514
 
496
515
  /**
497
516
  * Returns the result of this union operator with the specified
498
- * #tgt_hll_type.
517
+ * #target_hll_type.
499
518
  * @param tgt_type The tgt_hll_type enum value of the desired result (Default: HLL_4)
500
519
  * @return The result of this union with the specified tgt_hll_type
501
520
  */
@@ -629,12 +648,6 @@ class hll_union_alloc {
629
648
  hll_sketch_alloc<A> gadget_;
630
649
  };
631
650
 
632
- /// convenience alias for hll_sketch with default allocator
633
- typedef hll_sketch_alloc<> hll_sketch;
634
-
635
- /// convenience alias for hll_union with default allocator
636
- typedef hll_union_alloc<> hll_union;
637
-
638
651
  } // namespace datasketches
639
652
 
640
653
  #include "hll.private.hpp"
@@ -20,7 +20,6 @@ add_executable(hll_test)
20
20
  target_link_libraries(hll_test hll common_test_lib)
21
21
 
22
22
  set_target_properties(hll_test PROPERTIES
23
- CXX_STANDARD 11
24
23
  CXX_STANDARD_REQUIRED YES
25
24
  )
26
25
 
@@ -49,3 +48,17 @@ target_sources(hll_test
49
48
  ToFromByteArrayTest.cpp
50
49
  IsomorphicTest.cpp
51
50
  )
51
+
52
+ if (SERDE_COMPAT)
53
+ target_sources(hll_test
54
+ PRIVATE
55
+ hll_sketch_deserialize_from_java_test.cpp
56
+ )
57
+ endif()
58
+
59
+ if (GENERATE)
60
+ target_sources(hll_test
61
+ PRIVATE
62
+ hll_sketch_serialize_for_java.cpp
63
+ )
64
+ endif()
@@ -53,74 +53,6 @@ TEST_CASE("hll to/from byte array: double serialize", "[hll_byte_array]") {
53
53
  }
54
54
  }
55
55
 
56
- TEST_CASE("hll to/from byte array: deserialize from java", "[hll_byte_array]") {
57
- std::string inputPath;
58
- #ifdef TEST_BINARY_INPUT_PATH
59
- inputPath = TEST_BINARY_INPUT_PATH;
60
- #else
61
- inputPath = "test/";
62
- #endif
63
-
64
- std::ifstream ifs;
65
- ifs.open(inputPath + "list_from_java.sk", std::ios::binary);
66
- hll_sketch sk = hll_sketch::deserialize(ifs);
67
- REQUIRE(sk.is_empty() == false);
68
- REQUIRE(sk.get_lg_config_k() == 8);
69
- REQUIRE(sk.get_lower_bound(1) == 7.0);
70
- REQUIRE(sk.get_estimate() == Approx(7.0).margin(1e-6));
71
- REQUIRE(sk.get_upper_bound(1) == Approx(7.000350).margin(1e-5));
72
- ifs.close();
73
-
74
- ifs.open(inputPath + "compact_set_from_java.sk", std::ios::binary);
75
- sk = hll_sketch::deserialize(ifs);
76
- REQUIRE(sk.is_empty() == false);
77
- REQUIRE(sk.get_lg_config_k() == 8);
78
- REQUIRE(sk.get_lower_bound(1) == 24.0);
79
- REQUIRE(sk.get_estimate() == Approx(24.0).margin(1e-5));
80
- REQUIRE(sk.get_upper_bound(1) == Approx(24.001200).margin(1e-5));
81
- ifs.close();
82
-
83
- ifs.open(inputPath + "updatable_set_from_java.sk", std::ios::binary);
84
- sk = hll_sketch::deserialize(ifs);
85
- REQUIRE(sk.is_empty() == false);
86
- REQUIRE(sk.get_lg_config_k() == 8);
87
- REQUIRE(sk.get_lower_bound(1) == 24.0);
88
- REQUIRE(sk.get_estimate() == Approx(24.0).margin(1e-5));
89
- REQUIRE(sk.get_upper_bound(1) == Approx(24.001200).margin(1e-5));
90
- ifs.close();
91
-
92
-
93
- ifs.open(inputPath + "array6_from_java.sk", std::ios::binary);
94
- sk = hll_sketch::deserialize(ifs);
95
- REQUIRE(sk.is_empty() == false);
96
- REQUIRE(sk.get_lg_config_k() == 8);
97
- REQUIRE(sk.get_lower_bound(1) == Approx(9589.968564).margin(1e-5));
98
- REQUIRE(sk.get_estimate() == Approx(10089.150211).margin(1e-5));
99
- REQUIRE(sk.get_upper_bound(1) == Approx(10642.370492).margin(1e-5));
100
- ifs.close();
101
-
102
-
103
- ifs.open(inputPath + "compact_array4_from_java.sk", std::ios::binary);
104
- sk = hll_sketch::deserialize(ifs);
105
- REQUIRE(sk.is_empty() == false);
106
- REQUIRE(sk.get_lg_config_k() == 8);
107
- REQUIRE(sk.get_lower_bound(1) == Approx(9589.968564).margin(1e-5));
108
- REQUIRE(sk.get_estimate() == Approx(10089.150211).margin(1e-5));
109
- REQUIRE(sk.get_upper_bound(1) == Approx(10642.370492).margin(1e-5));
110
-
111
- ifs.close();
112
-
113
-
114
- ifs.open(inputPath + "updatable_array4_from_java.sk", std::ios::binary);
115
- sk = hll_sketch::deserialize(ifs);
116
- REQUIRE(sk.is_empty() == false);
117
- REQUIRE(sk.get_lg_config_k() == 8);
118
- REQUIRE(sk.get_lower_bound(1) == Approx(9589.968564).margin(1e-5));
119
- REQUIRE(sk.get_estimate() == Approx(10089.150211).margin(1e-5));
120
- REQUIRE(sk.get_upper_bound(1) == Approx(10642.370492).margin(1e-5));
121
- ifs.close();
122
- }
123
-
124
56
  static void checkSketchEquality(hll_sketch& sk1, hll_sketch& sk2) {
125
57
  REQUIRE(sk1.get_lg_config_k() == sk2.get_lg_config_k());
126
58
  REQUIRE(sk1.get_lower_bound(1) == sk2.get_lower_bound(1));