datasketches 0.3.2 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (237) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +2 -2
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/NOTICE +1 -1
  15. data/vendor/datasketches-cpp/README.md +1 -3
  16. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  21. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  23. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  24. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  25. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  26. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  27. data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
  28. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
  29. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
  30. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
  31. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
  32. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  33. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  34. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  35. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  36. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
  37. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  38. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  42. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  43. data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  46. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
  47. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
  48. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
  49. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  50. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  51. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  52. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  53. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  54. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  55. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  56. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  57. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
  58. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
  59. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  60. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
  62. data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
  63. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  64. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  65. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  66. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  67. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
  70. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  71. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
  74. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
  75. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
  76. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  77. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  78. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  79. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
  80. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
  84. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  85. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  86. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  87. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  88. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  89. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  90. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
  91. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  92. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
  94. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
  95. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
  96. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
  97. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  98. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  99. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  100. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  101. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  102. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  103. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
  107. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
  108. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  110. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  114. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  115. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  116. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  117. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
  120. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
  121. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  122. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  124. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
  125. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
  126. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
  127. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
  130. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  131. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  132. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  133. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  134. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  135. data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
  136. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  137. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  138. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  139. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  140. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  141. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  142. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  143. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  144. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  145. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  146. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  147. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  148. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  149. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  150. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  151. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  152. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
  153. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  154. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  155. metadata +47 -93
  156. data/vendor/datasketches-cpp/MANIFEST.in +0 -39
  157. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  160. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  161. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  162. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  163. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  164. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  165. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  166. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  167. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  168. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
  169. data/vendor/datasketches-cpp/python/README.md +0 -85
  170. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
  171. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
  172. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
  173. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
  174. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
  175. data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
  176. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
  177. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  178. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
  179. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
  180. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  181. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  182. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  183. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  184. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  185. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  186. data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
  187. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
  188. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
  189. data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
  190. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
  191. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
  192. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
  193. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
  194. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  195. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
  196. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
  197. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
  198. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
  199. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
  200. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  201. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  202. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  203. data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
  204. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  205. data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
  206. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
  207. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
  208. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
  209. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
  210. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
  211. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
  212. data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
  213. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  214. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
  215. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  216. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  217. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  218. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  219. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  220. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  221. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  222. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  223. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  224. data/vendor/datasketches-cpp/setup.py +0 -110
  225. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tox.ini +0 -26
  230. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  231. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  232. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  233. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  234. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -0,0 +1,83 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch2/catch.hpp>
21
+ #include <fstream>
22
+ #include <frequent_items_sketch.hpp>
23
+
24
+ namespace datasketches {
25
+
26
+ TEST_CASE("frequent longs sketch generate", "[serialize_for_java]") {
27
+ const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
28
+ for (const unsigned n: n_arr) {
29
+ frequent_items_sketch<long> sketch(6);
30
+ for (unsigned i = 1; i <= n; ++i) sketch.update(i);
31
+ REQUIRE(sketch.is_empty() == (n == 0));
32
+ if (n > 10) {
33
+ REQUIRE(sketch.get_maximum_error() > 0);
34
+ } else {
35
+ REQUIRE(sketch.get_maximum_error() == 0);
36
+ }
37
+ REQUIRE(sketch.get_total_weight() == n);
38
+ std::ofstream os("frequent_long_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
39
+ sketch.serialize(os);
40
+ }
41
+ }
42
+
43
+ TEST_CASE("frequent strings sketch generate", "[serialize_for_java]") {
44
+ const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
45
+ for (const unsigned n: n_arr) {
46
+ frequent_items_sketch<std::string> sketch(6);
47
+ for (unsigned i = 1; i <= n; ++i) sketch.update(std::to_string(i));
48
+ REQUIRE(sketch.is_empty() == (n == 0));
49
+ if (n > 10) {
50
+ REQUIRE(sketch.get_maximum_error() > 0);
51
+ } else {
52
+ REQUIRE(sketch.get_maximum_error() == 0);
53
+ }
54
+ REQUIRE(sketch.get_total_weight() == n);
55
+ std::ofstream os("frequent_string_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
56
+ sketch.serialize(os);
57
+ }
58
+ }
59
+
60
+ TEST_CASE("frequent strings sketch ascii", "[serialize_for_java]") {
61
+ frequent_items_sketch<std::string> sketch(6);
62
+ sketch.update("aaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 1);
63
+ sketch.update("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb", 2);
64
+ sketch.update("ccccccccccccccccccccccccccccc", 3);
65
+ sketch.update("ddddddddddddddddddddddddddddd", 4);
66
+ std::ofstream os("frequent_string_ascii_cpp.sk", std::ios::binary);
67
+ sketch.serialize(os);
68
+ }
69
+
70
+ TEST_CASE("frequent strings sketch utf8", "[serialize_for_java]") {
71
+ frequent_items_sketch<std::string> sketch(6);
72
+ sketch.update("абвгд", 1);
73
+ sketch.update("еёжзи", 2);
74
+ sketch.update("йклмн", 3);
75
+ sketch.update("опрст", 4);
76
+ sketch.update("уфхцч", 5);
77
+ sketch.update("шщъыь", 6);
78
+ sketch.update("эюя", 7);
79
+ std::ofstream os("frequent_string_utf8_cpp.sk", std::ios::binary);
80
+ sketch.serialize(os);
81
+ }
82
+
83
+ } /* namespace datasketches */
@@ -70,6 +70,7 @@ TEST_CASE("frequent items: several items, no resize, no purge", "[frequent_items
70
70
  REQUIRE(sketch.get_estimate("b") == 3);
71
71
  REQUIRE(sketch.get_estimate("c") == 2);
72
72
  REQUIRE(sketch.get_estimate("d") == 1);
73
+ REQUIRE(sketch.get_maximum_error() == 0);
73
74
  }
74
75
 
75
76
  TEST_CASE("frequent items: several items, with resize, no purge", "[frequent_items_sketch]") {
@@ -96,6 +97,7 @@ TEST_CASE("frequent items: several items, with resize, no purge", "[frequent_ite
96
97
  REQUIRE(sketch.get_estimate("b") == 3);
97
98
  REQUIRE(sketch.get_estimate("c") == 2);
98
99
  REQUIRE(sketch.get_estimate("d") == 1);
100
+ REQUIRE(sketch.get_maximum_error() == 0);
99
101
  }
100
102
 
101
103
  TEST_CASE("frequent items: estimation mode", "[frequent_items_sketch]") {
@@ -149,6 +151,7 @@ TEST_CASE("frequent items: merge exact mode", "[frequent_items_sketch]") {
149
151
  REQUIRE(sketch1.get_estimate(2) == 3);
150
152
  REQUIRE(sketch1.get_estimate(3) == 2);
151
153
  REQUIRE(sketch1.get_estimate(4) == 1);
154
+ REQUIRE(sketch1.get_maximum_error() == 0);
152
155
  }
153
156
 
154
157
  TEST_CASE("frequent items: merge estimation mode", "[frequent_items_sketch]") {
@@ -199,48 +202,6 @@ TEST_CASE("frequent items: merge estimation mode", "[frequent_items_sketch]") {
199
202
  REQUIRE(9 <= items[1].get_estimate()); // always overestimated
200
203
  }
201
204
 
202
- TEST_CASE("frequent items: deserialize from java long", "[frequent_items_sketch]") {
203
- std::ifstream is;
204
- is.exceptions(std::ios::failbit | std::ios::badbit);
205
- is.open(testBinaryInputPath + "longs_sketch_from_java.sk", std::ios::binary);
206
- auto sketch = frequent_items_sketch<long long>::deserialize(is);
207
- REQUIRE_FALSE(sketch.is_empty());
208
- REQUIRE(sketch.get_total_weight() == 4);
209
- REQUIRE(sketch.get_num_active_items() == 4);
210
- REQUIRE(sketch.get_estimate(1) == 1);
211
- REQUIRE(sketch.get_estimate(2) == 1);
212
- REQUIRE(sketch.get_estimate(3) == 1);
213
- REQUIRE(sketch.get_estimate(4) == 1);
214
- }
215
-
216
- TEST_CASE("frequent items: deserialize from java string", "[frequent_items_sketch]") {
217
- std::ifstream is;
218
- is.exceptions(std::ios::failbit | std::ios::badbit);
219
- is.open(testBinaryInputPath + "items_sketch_string_from_java.sk", std::ios::binary);
220
- auto sketch = frequent_items_sketch<std::string>::deserialize(is);
221
- REQUIRE_FALSE(sketch.is_empty());
222
- REQUIRE(sketch.get_total_weight() == 4);
223
- REQUIRE(sketch.get_num_active_items() == 4);
224
- REQUIRE(sketch.get_estimate("aaaaaaaaaaaaaaaaaaaaaaaaaaaaa") == 1);
225
- REQUIRE(sketch.get_estimate("bbbbbbbbbbbbbbbbbbbbbbbbbbbbb") == 1);
226
- REQUIRE(sketch.get_estimate("ccccccccccccccccccccccccccccc") == 1);
227
- REQUIRE(sketch.get_estimate("ddddddddddddddddddddddddddddd") == 1);
228
- }
229
-
230
- TEST_CASE("frequent items: deserialize from java string, utf-8", "[frequent_items_sketch]") {
231
- std::ifstream is;
232
- is.exceptions(std::ios::failbit | std::ios::badbit);
233
- is.open(testBinaryInputPath + "items_sketch_string_utf8_from_java.sk", std::ios::binary);
234
- auto sketch = frequent_items_sketch<std::string>::deserialize(is);
235
- REQUIRE_FALSE(sketch.is_empty());
236
- REQUIRE(sketch.get_total_weight() == 10);
237
- REQUIRE(sketch.get_num_active_items() == 4);
238
- REQUIRE(sketch.get_estimate("абвгд") == 1);
239
- REQUIRE(sketch.get_estimate("еёжзи") == 2);
240
- REQUIRE(sketch.get_estimate("йклмн") == 3);
241
- REQUIRE(sketch.get_estimate("опрст") == 4);
242
- }
243
-
244
205
  TEST_CASE("frequent items: deserialize long64 stream", "[frequent_items_sketch]") {
245
206
  frequent_items_sketch<long long> sketch1(3);
246
207
  sketch1.update(1, 1);
@@ -169,9 +169,9 @@ CouponList<A>* CouponList<A>::newList(std::istream& is, const A& allocator) {
169
169
  }
170
170
 
171
171
  template<typename A>
172
- vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes) const {
172
+ auto CouponList<A>::serialize(bool compact, unsigned header_size_bytes) const -> vector_bytes {
173
173
  const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes;
174
- vector_u8<A> byteArr(sketchSizeBytes, 0, getAllocator());
174
+ vector_bytes byteArr(sketchSizeBytes, 0, getAllocator());
175
175
  uint8_t* bytes = byteArr.data() + header_size_bytes;
176
176
 
177
177
  bytes[hll_constants::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
@@ -33,12 +33,14 @@ class HllSketchImplFactory;
33
33
  template<typename A>
34
34
  class CouponList : public HllSketchImpl<A> {
35
35
  public:
36
+ using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
37
+
36
38
  CouponList(uint8_t lgConfigK, target_hll_type tgtHllType, hll_mode mode, const A& allocator);
37
39
  CouponList(const CouponList& that, target_hll_type tgtHllType);
38
40
 
39
41
  static CouponList* newList(const void* bytes, size_t len, const A& allocator);
40
42
  static CouponList* newList(std::istream& is, const A& allocator);
41
- virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const;
43
+ virtual vector_bytes serialize(bool compact, unsigned header_size_bytes) const;
42
44
  virtual void serialize(std::ostream& os, bool compact) const;
43
45
 
44
46
  virtual ~CouponList() = default;
@@ -216,9 +216,9 @@ HllArray<A>* HllArray<A>::newHll(std::istream& is, const A& allocator) {
216
216
  }
217
217
 
218
218
  template<typename A>
219
- vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) const {
219
+ auto HllArray<A>::serialize(bool compact, unsigned header_size_bytes) const -> vector_bytes {
220
220
  const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes;
221
- vector_u8<A> byteArr(sketchSizeBytes, 0, getAllocator());
221
+ vector_bytes byteArr(sketchSizeBytes, 0, getAllocator());
222
222
  uint8_t* bytes = byteArr.data() + header_size_bytes;
223
223
  AuxHashMap<A>* auxHashMap = getAuxHashMap();
224
224
 
@@ -537,7 +537,7 @@ AuxHashMap<A>* HllArray<A>::getAuxHashMap() const {
537
537
  }
538
538
 
539
539
  template<typename A>
540
- const vector_u8<A>& HllArray<A>::getHllArray() const {
540
+ auto HllArray<A>::getHllArray() const -> const vector_bytes& {
541
541
  return hllByteArr_;
542
542
  }
543
543
 
@@ -31,13 +31,15 @@ class AuxHashMap;
31
31
  template<typename A>
32
32
  class HllArray : public HllSketchImpl<A> {
33
33
  public:
34
+ using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
35
+
34
36
  HllArray(uint8_t lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator);
35
37
  explicit HllArray(const HllArray& other, target_hll_type tgtHllType);
36
38
 
37
39
  static HllArray* newHll(const void* bytes, size_t len, const A& allocator);
38
40
  static HllArray* newHll(std::istream& is, const A& allocator);
39
41
 
40
- virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const;
42
+ virtual vector_bytes serialize(bool compact, unsigned header_size_bytes) const;
41
43
  virtual void serialize(std::ostream& os, bool compact) const;
42
44
 
43
45
  virtual ~HllArray() = default;
@@ -97,7 +99,7 @@ class HllArray : public HllSketchImpl<A> {
97
99
 
98
100
  virtual A getAllocator() const;
99
101
 
100
- const vector_u8<A>& getHllArray() const;
102
+ const vector_bytes& getHllArray() const;
101
103
 
102
104
  protected:
103
105
  void hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue);
@@ -107,7 +109,7 @@ class HllArray : public HllSketchImpl<A> {
107
109
  double hipAccum_;
108
110
  double kxq0_;
109
111
  double kxq1_;
110
- vector_u8<A> hllByteArr_; //init by sub-classes
112
+ vector_bytes hllByteArr_; //init by sub-classes
111
113
  uint8_t curMin_; //always zero for Hll6 and Hll8, only tracked by Hll4Array
112
114
  uint32_t numAtCurMin_; //interpreted as num zeros when curMin == 0
113
115
  bool oooFlag_; //Out-Of-Order Flag
@@ -94,14 +94,14 @@ hll_sketch_alloc<A>::hll_sketch_alloc(HllSketchImpl<A>* that) :
94
94
  {}
95
95
 
96
96
  template<typename A>
97
- hll_sketch_alloc<A> hll_sketch_alloc<A>::operator=(const hll_sketch_alloc<A>& other) {
97
+ hll_sketch_alloc<A>& hll_sketch_alloc<A>::operator=(const hll_sketch_alloc<A>& other) {
98
98
  sketch_impl->get_deleter()(sketch_impl);
99
99
  sketch_impl = other.sketch_impl->copy();
100
100
  return *this;
101
101
  }
102
102
 
103
103
  template<typename A>
104
- hll_sketch_alloc<A> hll_sketch_alloc<A>::operator=(hll_sketch_alloc<A>&& other) {
104
+ hll_sketch_alloc<A>& hll_sketch_alloc<A>::operator=(hll_sketch_alloc<A>&& other) {
105
105
  std::swap(sketch_impl, other.sketch_impl);
106
106
  return *this;
107
107
  }
@@ -232,12 +232,12 @@ void hll_sketch_alloc<A>::serialize_updatable(std::ostream& os) const {
232
232
  }
233
233
 
234
234
  template<typename A>
235
- vector_u8<A> hll_sketch_alloc<A>::serialize_compact(unsigned header_size_bytes) const {
235
+ auto hll_sketch_alloc<A>::serialize_compact(unsigned header_size_bytes) const -> vector_bytes {
236
236
  return sketch_impl->serialize(true, header_size_bytes);
237
237
  }
238
238
 
239
239
  template<typename A>
240
- vector_u8<A> hll_sketch_alloc<A>::serialize_updatable() const {
240
+ auto hll_sketch_alloc<A>::serialize_updatable() const -> vector_bytes {
241
241
  return sketch_impl->serialize(false, 0);
242
242
  }
243
243
 
@@ -30,11 +30,13 @@ namespace datasketches {
30
30
  template<typename A>
31
31
  class HllSketchImpl {
32
32
  public:
33
+ using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
34
+
33
35
  HllSketchImpl(uint8_t lgConfigK, target_hll_type tgtHllType, hll_mode mode, bool startFullSize);
34
36
  virtual ~HllSketchImpl();
35
37
 
36
38
  virtual void serialize(std::ostream& os, bool compact) const = 0;
37
- virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const = 0;
39
+ virtual vector_bytes serialize(bool compact, unsigned header_size_bytes) const = 0;
38
40
 
39
41
  virtual HllSketchImpl* copy() const = 0;
40
42
  virtual HllSketchImpl* copyAs(target_hll_type tgtHllType) const = 0;
@@ -124,8 +124,6 @@ public:
124
124
  static uint32_t pair(uint32_t slotNo, uint8_t value);
125
125
  static uint32_t getLow26(uint32_t coupon);
126
126
  static uint8_t getValue(uint32_t coupon);
127
- static double invPow2(uint8_t e);
128
- static uint8_t ceilingPowerOf2(uint32_t n);
129
127
  static uint8_t simpleIntLog2(uint32_t n); // n must be power of 2
130
128
  static uint8_t computeLgArrInts(hll_mode mode, uint32_t count, uint8_t lgConfigK);
131
129
  static double getRelErr(bool upperBound, bool unioned, uint8_t lgConfigK, uint8_t numStdDev);
@@ -204,16 +202,6 @@ inline uint8_t HllUtil<A>::getValue(uint32_t coupon) {
204
202
  return coupon >> hll_constants::KEY_BITS_26;
205
203
  }
206
204
 
207
- template<typename A>
208
- inline double HllUtil<A>::invPow2(uint8_t e) {
209
- union {
210
- long long longVal;
211
- double doubleVal;
212
- } conv;
213
- conv.longVal = (1023L - e) << 52;
214
- return conv.doubleVal;
215
- }
216
-
217
205
  template<typename A>
218
206
  inline uint8_t HllUtil<A>::simpleIntLog2(uint32_t n) {
219
207
  if (n == 0) {
@@ -30,40 +30,15 @@
30
30
 
31
31
  namespace datasketches {
32
32
 
33
- /**
34
- * This is a high performance implementation of Phillipe Flajolet&#8217;s HLL sketch but with
35
- * significantly improved error behavior. If the ONLY use case for sketching is counting
36
- * uniques and merging, the HLL sketch is a reasonable choice, although the highest performing in terms of accuracy for
37
- * storage space consumed is CPC (Compressed Probabilistic Counting). For large enough counts, this HLL version (with HLL_4) can be 2 to
38
- * 16 times smaller than the Theta sketch family for the same accuracy.
39
- *
40
- * <p>This implementation offers three different types of HLL sketch, each with different
41
- * trade-offs with accuracy, space and performance. These types are specified with the
42
- * {@link TgtHllType} parameter.
43
- *
44
- * <p>In terms of accuracy, all three types, for the same <i>lg_config_k</i>, have the same error
45
- * distribution as a function of <i>n</i>, the number of unique values fed to the sketch.
46
- * The configuration parameter <i>lg_config_k</i> is the log-base-2 of <i>K</i>,
47
- * where <i>K</i> is the number of buckets or slots for the sketch.
48
- *
49
- * <p>During warmup, when the sketch has only received a small number of unique items
50
- * (up to about 10% of <i>K</i>), this implementation leverages a new class of estimator
51
- * algorithms with significantly better accuracy.
52
- *
53
- * <p>This sketch also offers the capability of operating off-heap. Given a WritableMemory object
54
- * created by the user, the sketch will perform all of its updates and internal phase transitions
55
- * in that object, which can actually reside either on-heap or off-heap based on how it is
56
- * configured. In large systems that must update and merge many millions of sketches, having the
57
- * sketch operate off-heap avoids the serialization and deserialization costs of moving sketches
58
- * to and from off-heap memory-mapped files, for example, and eliminates big garbage collection
59
- * delays.
60
- *
61
- * author Jon Malkin
62
- * author Lee Rhodes
63
- * author Kevin Lang
64
- */
33
+ // forward declarations
34
+ template<typename A> class hll_sketch_alloc;
35
+ template<typename A> class hll_union_alloc;
36
+
37
+ /// HLL sketch alias with default allocator
38
+ using hll_sketch = hll_sketch_alloc<std::allocator<uint8_t>>;
39
+ /// HLL union alias with default allocator
40
+ using hll_union = hll_union_alloc<std::allocator<uint8_t>>;
65
41
 
66
-
67
42
  /**
68
43
  * Specifies the target type of HLL sketch to be created. It is a target in that the actual
69
44
  * allocation of the HLL array is deferred until sufficient number of items have been received by
@@ -100,14 +75,41 @@ enum target_hll_type {
100
75
  HLL_8 ///< 8 bits per entry (fastest, fixed size)
101
76
  };
102
77
 
103
- template<typename A>
104
- class HllSketchImpl;
105
-
106
- template<typename A>
107
- class hll_union_alloc;
78
+ /**
79
+ * This is a high performance implementation of Phillipe Flajolet's HLL sketch but with
80
+ * significantly improved error behavior. If the ONLY use case for sketching is counting
81
+ * uniques and merging, the HLL sketch is a reasonable choice, although the highest performing in terms of accuracy for
82
+ * storage space consumed is CPC (Compressed Probabilistic Counting). For large enough counts, this HLL version (with HLL_4) can be 2 to
83
+ * 16 times smaller than the Theta sketch family for the same accuracy.
84
+ *
85
+ * <p>This implementation offers three different types of HLL sketch, each with different
86
+ * trade-offs with accuracy, space and performance. These types are specified with the
87
+ * {@link target_hll_type} parameter.
88
+ *
89
+ * <p>In terms of accuracy, all three types, for the same <i>lg_config_k</i>, have the same error
90
+ * distribution as a function of <i>n</i>, the number of unique values fed to the sketch.
91
+ * The configuration parameter <i>lg_config_k</i> is the log-base-2 of <i>K</i>,
92
+ * where <i>K</i> is the number of buckets or slots for the sketch.
93
+ *
94
+ * <p>During warmup, when the sketch has only received a small number of unique items
95
+ * (up to about 10% of <i>K</i>), this implementation leverages a new class of estimator
96
+ * algorithms with significantly better accuracy.
97
+ *
98
+ * <p>This sketch also offers the capability of operating off-heap. Given a WritableMemory object
99
+ * created by the user, the sketch will perform all of its updates and internal phase transitions
100
+ * in that object, which can actually reside either on-heap or off-heap based on how it is
101
+ * configured. In large systems that must update and merge many millions of sketches, having the
102
+ * sketch operate off-heap avoids the serialization and deserialization costs of moving sketches
103
+ * to and from off-heap memory-mapped files, for example, and eliminates big garbage collection
104
+ * delays.
105
+ *
106
+ * author Jon Malkin
107
+ * author Lee Rhodes
108
+ * author Kevin Lang
109
+ */
108
110
 
109
- template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
110
- template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
111
+ // forward declaration
112
+ template<typename A> class HllSketchImpl;
111
113
 
112
114
  template<typename A = std::allocator<uint8_t> >
113
115
  class hll_sketch_alloc final {
@@ -119,27 +121,33 @@ class hll_sketch_alloc final {
119
121
  * @param start_full_size Indicates whether to start in HLL mode,
120
122
  * keeping memory use constant (if HLL_6 or HLL_8) at the cost of
121
123
  * starting out using much more memory
124
+ * @param allocator instance of an Allocator
122
125
  */
123
126
  explicit hll_sketch_alloc(uint8_t lg_config_k, target_hll_type tgt_type = HLL_4, bool start_full_size = false, const A& allocator = A());
124
127
 
125
128
  /**
126
129
  * Copy constructor
130
+ * @param that sketch to be copied
127
131
  */
128
132
  hll_sketch_alloc(const hll_sketch_alloc<A>& that);
129
133
 
130
134
  /**
131
135
  * Copy constructor to a new target type
136
+ * @param that sketch to be copied
137
+ * @param tgt_type target_hll_type
132
138
  */
133
139
  hll_sketch_alloc(const hll_sketch_alloc<A>& that, target_hll_type tgt_type);
134
140
 
135
141
  /**
136
142
  * Move constructor
143
+ * @param that sketch to be moved
137
144
  */
138
145
  hll_sketch_alloc(hll_sketch_alloc<A>&& that) noexcept;
139
146
 
140
147
  /**
141
148
  * Reconstructs a sketch from a serialized image on a stream.
142
149
  * @param is An input stream with a binary image of a sketch
150
+ * @param allocator instance of an Allocator
143
151
  */
144
152
  static hll_sketch_alloc deserialize(std::istream& is, const A& allocator = A());
145
153
 
@@ -147,17 +155,26 @@ class hll_sketch_alloc final {
147
155
  * Reconstructs a sketch from a serialized image in a byte array.
148
156
  * @param bytes An input array with a binary image of a sketch
149
157
  * @param len Length of the input array, in bytes
158
+ * @param allocator instance of an Allocator
150
159
  */
151
160
  static hll_sketch_alloc deserialize(const void* bytes, size_t len, const A& allocator = A());
152
161
 
153
162
  //! Class destructor
154
163
  virtual ~hll_sketch_alloc();
155
164
 
156
- //! Copy assignment operator
157
- hll_sketch_alloc operator=(const hll_sketch_alloc<A>& other);
165
+ /**
166
+ * Copy assignment operator
167
+ * @param other sketch to be copied
168
+ * @return reference to this sketch
169
+ */
170
+ hll_sketch_alloc& operator=(const hll_sketch_alloc<A>& other);
158
171
 
159
- //! Move assignment operator
160
- hll_sketch_alloc operator=(hll_sketch_alloc<A>&& other);
172
+ /**
173
+ * Move assignment operator
174
+ * @param other sketch to be moved
175
+ * @return reference to this sketch
176
+ */
177
+ hll_sketch_alloc& operator=(hll_sketch_alloc<A>&& other);
161
178
 
162
179
  /**
163
180
  * Resets the sketch to an empty state in coupon collection mode.
@@ -165,18 +182,22 @@ class hll_sketch_alloc final {
165
182
  */
166
183
  void reset();
167
184
 
168
- typedef vector_u8<A> vector_bytes; // alias for users
185
+ // This is a convenience alias for users
186
+ // The type returned by the following serialize method
187
+ using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
169
188
 
170
189
  /**
171
190
  * Serializes the sketch to a byte array, compacting data structures
172
191
  * where feasible to eliminate unused storage in the serialized image.
173
192
  * @param header_size_bytes Allows for PostgreSQL integration
193
+ * @return serialized sketch in binary form
174
194
  */
175
195
  vector_bytes serialize_compact(unsigned header_size_bytes = 0) const;
176
196
 
177
197
  /**
178
198
  * Serializes the sketch to a byte array, retaining all internal
179
199
  * data structures in their current form.
200
+ * @return serialized sketch in binary form
180
201
  */
181
202
  vector_bytes serialize_updatable() const;
182
203
 
@@ -392,8 +413,6 @@ class hll_sketch_alloc final {
392
413
  bool is_out_of_order_flag() const;
393
414
  bool is_estimation_mode() const;
394
415
 
395
- typedef typename std::allocator_traits<A>::template rebind_alloc<hll_sketch_alloc> AllocHllSketch;
396
-
397
416
  HllSketchImpl<A>* sketch_impl;
398
417
  friend hll_union_alloc<A>;
399
418
  };
@@ -413,8 +432,8 @@ class hll_sketch_alloc final {
413
432
  * <p>Although the API for this union operator parallels many of the methods of the
414
433
  * <i>HllSketch</i>, the behavior of the union operator has some fundamental differences.
415
434
  *
416
- * <p>First, the user cannot specify the #tgt_hll_type as an input parameter.
417
- * Instead, it is specified for the sketch returned with #get_result(tgt_hll_tyope).
435
+ * <p>First, the user cannot specify the #target_hll_type as an input parameter.
436
+ * Instead, it is specified for the sketch returned with #get_result.
418
437
  *
419
438
  * <p>Second, the internal effective value of log-base-2 of <i>k</i> for the union operation can
420
439
  * change dynamically based on the smallest <i>lg_config_k</i> that the union operation has seen.
@@ -423,7 +442,6 @@ class hll_sketch_alloc final {
423
442
  * author Lee Rhodes
424
443
  * author Kevin Lang
425
444
  */
426
-
427
445
  template<typename A = std::allocator<uint8_t> >
428
446
  class hll_union_alloc {
429
447
  public:
@@ -431,6 +449,7 @@ class hll_union_alloc {
431
449
  * Construct an hll_union operator with the given maximum log2 of k.
432
450
  * @param lg_max_k The maximum size, in log2, of k. The value must
433
451
  * be between 7 and 21, inclusive.
452
+ * @param allocator instance of an Allocator
434
453
  */
435
454
  explicit hll_union_alloc(uint8_t lg_max_k, const A& allocator = A());
436
455
 
@@ -495,7 +514,7 @@ class hll_union_alloc {
495
514
 
496
515
  /**
497
516
  * Returns the result of this union operator with the specified
498
- * #tgt_hll_type.
517
+ * #target_hll_type.
499
518
  * @param tgt_type The tgt_hll_type enum value of the desired result (Default: HLL_4)
500
519
  * @return The result of this union with the specified tgt_hll_type
501
520
  */
@@ -629,12 +648,6 @@ class hll_union_alloc {
629
648
  hll_sketch_alloc<A> gadget_;
630
649
  };
631
650
 
632
- /// convenience alias for hll_sketch with default allocator
633
- typedef hll_sketch_alloc<> hll_sketch;
634
-
635
- /// convenience alias for hll_union with default allocator
636
- typedef hll_union_alloc<> hll_union;
637
-
638
651
  } // namespace datasketches
639
652
 
640
653
  #include "hll.private.hpp"
@@ -20,7 +20,6 @@ add_executable(hll_test)
20
20
  target_link_libraries(hll_test hll common_test_lib)
21
21
 
22
22
  set_target_properties(hll_test PROPERTIES
23
- CXX_STANDARD 11
24
23
  CXX_STANDARD_REQUIRED YES
25
24
  )
26
25
 
@@ -49,3 +48,17 @@ target_sources(hll_test
49
48
  ToFromByteArrayTest.cpp
50
49
  IsomorphicTest.cpp
51
50
  )
51
+
52
+ if (SERDE_COMPAT)
53
+ target_sources(hll_test
54
+ PRIVATE
55
+ hll_sketch_deserialize_from_java_test.cpp
56
+ )
57
+ endif()
58
+
59
+ if (GENERATE)
60
+ target_sources(hll_test
61
+ PRIVATE
62
+ hll_sketch_serialize_for_java.cpp
63
+ )
64
+ endif()
@@ -53,74 +53,6 @@ TEST_CASE("hll to/from byte array: double serialize", "[hll_byte_array]") {
53
53
  }
54
54
  }
55
55
 
56
- TEST_CASE("hll to/from byte array: deserialize from java", "[hll_byte_array]") {
57
- std::string inputPath;
58
- #ifdef TEST_BINARY_INPUT_PATH
59
- inputPath = TEST_BINARY_INPUT_PATH;
60
- #else
61
- inputPath = "test/";
62
- #endif
63
-
64
- std::ifstream ifs;
65
- ifs.open(inputPath + "list_from_java.sk", std::ios::binary);
66
- hll_sketch sk = hll_sketch::deserialize(ifs);
67
- REQUIRE(sk.is_empty() == false);
68
- REQUIRE(sk.get_lg_config_k() == 8);
69
- REQUIRE(sk.get_lower_bound(1) == 7.0);
70
- REQUIRE(sk.get_estimate() == Approx(7.0).margin(1e-6));
71
- REQUIRE(sk.get_upper_bound(1) == Approx(7.000350).margin(1e-5));
72
- ifs.close();
73
-
74
- ifs.open(inputPath + "compact_set_from_java.sk", std::ios::binary);
75
- sk = hll_sketch::deserialize(ifs);
76
- REQUIRE(sk.is_empty() == false);
77
- REQUIRE(sk.get_lg_config_k() == 8);
78
- REQUIRE(sk.get_lower_bound(1) == 24.0);
79
- REQUIRE(sk.get_estimate() == Approx(24.0).margin(1e-5));
80
- REQUIRE(sk.get_upper_bound(1) == Approx(24.001200).margin(1e-5));
81
- ifs.close();
82
-
83
- ifs.open(inputPath + "updatable_set_from_java.sk", std::ios::binary);
84
- sk = hll_sketch::deserialize(ifs);
85
- REQUIRE(sk.is_empty() == false);
86
- REQUIRE(sk.get_lg_config_k() == 8);
87
- REQUIRE(sk.get_lower_bound(1) == 24.0);
88
- REQUIRE(sk.get_estimate() == Approx(24.0).margin(1e-5));
89
- REQUIRE(sk.get_upper_bound(1) == Approx(24.001200).margin(1e-5));
90
- ifs.close();
91
-
92
-
93
- ifs.open(inputPath + "array6_from_java.sk", std::ios::binary);
94
- sk = hll_sketch::deserialize(ifs);
95
- REQUIRE(sk.is_empty() == false);
96
- REQUIRE(sk.get_lg_config_k() == 8);
97
- REQUIRE(sk.get_lower_bound(1) == Approx(9589.968564).margin(1e-5));
98
- REQUIRE(sk.get_estimate() == Approx(10089.150211).margin(1e-5));
99
- REQUIRE(sk.get_upper_bound(1) == Approx(10642.370492).margin(1e-5));
100
- ifs.close();
101
-
102
-
103
- ifs.open(inputPath + "compact_array4_from_java.sk", std::ios::binary);
104
- sk = hll_sketch::deserialize(ifs);
105
- REQUIRE(sk.is_empty() == false);
106
- REQUIRE(sk.get_lg_config_k() == 8);
107
- REQUIRE(sk.get_lower_bound(1) == Approx(9589.968564).margin(1e-5));
108
- REQUIRE(sk.get_estimate() == Approx(10089.150211).margin(1e-5));
109
- REQUIRE(sk.get_upper_bound(1) == Approx(10642.370492).margin(1e-5));
110
-
111
- ifs.close();
112
-
113
-
114
- ifs.open(inputPath + "updatable_array4_from_java.sk", std::ios::binary);
115
- sk = hll_sketch::deserialize(ifs);
116
- REQUIRE(sk.is_empty() == false);
117
- REQUIRE(sk.get_lg_config_k() == 8);
118
- REQUIRE(sk.get_lower_bound(1) == Approx(9589.968564).margin(1e-5));
119
- REQUIRE(sk.get_estimate() == Approx(10089.150211).margin(1e-5));
120
- REQUIRE(sk.get_upper_bound(1) == Approx(10642.370492).margin(1e-5));
121
- ifs.close();
122
- }
123
-
124
56
  static void checkSketchEquality(hll_sketch& sk1, hll_sketch& sk2) {
125
57
  REQUIRE(sk1.get_lg_config_k() == sk2.get_lg_config_k());
126
58
  REQUIRE(sk1.get_lower_bound(1) == sk2.get_lower_bound(1));