datasketches 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (245) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +3 -3
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +23 -20
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/README.md +1 -3
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  16. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +25 -27
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +15 -10
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +11 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +5 -4
  21. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  23. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  24. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  25. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +2 -2
  26. data/vendor/datasketches-cpp/common/test/integration_test.cpp +6 -0
  27. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  28. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  29. data/vendor/datasketches-cpp/{python/src/__init__.py → count/CMakeLists.txt} +25 -1
  30. data/vendor/datasketches-cpp/count/include/count_min.hpp +405 -0
  31. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +497 -0
  32. data/vendor/datasketches-cpp/{MANIFEST.in → count/test/CMakeLists.txt} +23 -20
  33. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +155 -0
  34. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +303 -0
  35. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  36. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  37. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  38. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +3 -3
  39. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  40. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +14 -11
  41. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  42. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  43. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +16 -8
  44. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  45. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  46. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  47. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  48. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_serialize_for_java.cpp +38 -0
  49. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  50. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  51. data/vendor/datasketches-cpp/{tox.ini → density/CMakeLists.txt} +24 -8
  52. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +256 -0
  53. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +543 -0
  54. data/vendor/datasketches-cpp/{python/datasketches/__init__.py → density/test/CMakeLists.txt} +15 -3
  55. data/vendor/datasketches-cpp/density/test/density_sketch_test.cpp +244 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +9 -3
  59. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  60. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  63. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  64. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  65. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +19 -11
  66. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +2 -5
  67. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +19 -7
  68. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +1 -1
  69. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +98 -42
  70. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -0
  71. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +94 -61
  72. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +20 -8
  73. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  74. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  75. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +3 -21
  76. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +8 -0
  77. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +14 -18
  78. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +1 -1
  79. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +8 -2
  80. data/vendor/datasketches-cpp/hll/include/hll.hpp +79 -65
  81. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  82. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +7 -1
  83. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  84. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  85. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  86. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -1
  87. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
  88. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +79 -53
  89. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +61 -132
  90. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  91. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  92. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  93. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +5 -40
  94. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +76 -54
  95. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +66 -136
  96. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  97. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  98. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  99. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +15 -39
  100. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  101. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -4
  102. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +105 -26
  103. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +50 -111
  104. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  105. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  106. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  107. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  108. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  109. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  110. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
  111. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  112. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  113. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +89 -32
  114. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +33 -19
  115. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +13 -10
  116. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +23 -19
  117. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  118. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  119. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  120. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  121. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  122. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  123. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +33 -51
  124. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  125. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  126. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -20
  127. data/vendor/datasketches-cpp/theta/CMakeLists.txt +1 -0
  128. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +6279 -0
  129. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  130. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  131. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +14 -8
  132. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +60 -46
  133. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  134. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  135. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +3 -1
  136. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  137. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  138. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  139. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  140. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  141. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  142. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +180 -33
  143. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +430 -130
  144. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  145. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +10 -10
  146. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  147. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +21 -6
  148. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +13 -3
  149. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +15 -1
  150. data/vendor/datasketches-cpp/theta/test/bit_packing_test.cpp +80 -0
  151. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  152. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  153. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +39 -188
  154. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -0
  155. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  156. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  157. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  158. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  159. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  160. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection_impl.hpp +31 -0
  161. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  162. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  163. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  164. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  165. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  166. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  167. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  168. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  169. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +2 -1
  170. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  171. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  172. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  173. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  174. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  175. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  176. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  177. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  178. data/vendor/datasketches-cpp/{python/src/datasketches.cpp → tuple/test/tuple_sketch_serialize_for_java.cpp} +16 -30
  179. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  180. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  181. metadata +61 -79
  182. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  183. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  184. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  185. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  188. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  189. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  190. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  191. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  192. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  193. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -81
  194. data/vendor/datasketches-cpp/python/README.md +0 -85
  195. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -104
  196. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  197. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  198. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  199. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  200. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  201. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  202. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  203. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -90
  204. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -128
  205. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -134
  206. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -210
  207. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  208. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -111
  209. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -204
  210. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -215
  211. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -172
  212. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  213. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  214. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  215. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  216. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -110
  217. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -130
  218. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -125
  219. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -126
  220. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -126
  221. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -146
  222. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  223. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -125
  224. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  225. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  230. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  231. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  232. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  233. data/vendor/datasketches-cpp/setup.py +0 -110
  234. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  238. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  239. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  240. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  241. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  242. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  243. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  244. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  245. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -94,14 +94,14 @@ hll_sketch_alloc<A>::hll_sketch_alloc(HllSketchImpl<A>* that) :
94
94
  {}
95
95
 
96
96
  template<typename A>
97
- hll_sketch_alloc<A> hll_sketch_alloc<A>::operator=(const hll_sketch_alloc<A>& other) {
97
+ hll_sketch_alloc<A>& hll_sketch_alloc<A>::operator=(const hll_sketch_alloc<A>& other) {
98
98
  sketch_impl->get_deleter()(sketch_impl);
99
99
  sketch_impl = other.sketch_impl->copy();
100
100
  return *this;
101
101
  }
102
102
 
103
103
  template<typename A>
104
- hll_sketch_alloc<A> hll_sketch_alloc<A>::operator=(hll_sketch_alloc<A>&& other) {
104
+ hll_sketch_alloc<A>& hll_sketch_alloc<A>::operator=(hll_sketch_alloc<A>&& other) {
105
105
  std::swap(sketch_impl, other.sketch_impl);
106
106
  return *this;
107
107
  }
@@ -232,12 +232,12 @@ void hll_sketch_alloc<A>::serialize_updatable(std::ostream& os) const {
232
232
  }
233
233
 
234
234
  template<typename A>
235
- vector_u8<A> hll_sketch_alloc<A>::serialize_compact(unsigned header_size_bytes) const {
235
+ auto hll_sketch_alloc<A>::serialize_compact(unsigned header_size_bytes) const -> vector_bytes {
236
236
  return sketch_impl->serialize(true, header_size_bytes);
237
237
  }
238
238
 
239
239
  template<typename A>
240
- vector_u8<A> hll_sketch_alloc<A>::serialize_updatable() const {
240
+ auto hll_sketch_alloc<A>::serialize_updatable() const -> vector_bytes {
241
241
  return sketch_impl->serialize(false, 0);
242
242
  }
243
243
 
@@ -30,11 +30,13 @@ namespace datasketches {
30
30
  template<typename A>
31
31
  class HllSketchImpl {
32
32
  public:
33
+ using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
34
+
33
35
  HllSketchImpl(uint8_t lgConfigK, target_hll_type tgtHllType, hll_mode mode, bool startFullSize);
34
36
  virtual ~HllSketchImpl();
35
37
 
36
38
  virtual void serialize(std::ostream& os, bool compact) const = 0;
37
- virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const = 0;
39
+ virtual vector_bytes serialize(bool compact, unsigned header_size_bytes) const = 0;
38
40
 
39
41
  virtual HllSketchImpl* copy() const = 0;
40
42
  virtual HllSketchImpl* copyAs(target_hll_type tgtHllType) const = 0;
@@ -136,38 +136,20 @@ HllSketchImpl<A>* HllSketchImplFactory<A>::reset(HllSketchImpl<A>* impl, bool st
136
136
 
137
137
  template<typename A>
138
138
  Hll4Array<A>* HllSketchImplFactory<A>::convertToHll4(const HllArray<A>& srcHllArr) {
139
- const uint8_t lgConfigK = srcHllArr.getLgConfigK();
140
139
  using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
141
- Hll4Array<A>* hll4Array = new (Hll4Alloc(srcHllArr.getAllocator()).allocate(1))
142
- Hll4Array<A>(lgConfigK, srcHllArr.isStartFullSize(), srcHllArr.getAllocator());
143
- hll4Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
144
- hll4Array->mergeHll(srcHllArr);
145
- hll4Array->putHipAccum(srcHllArr.getHipAccum());
146
- return hll4Array;
140
+ return new (Hll4Alloc(srcHllArr.getAllocator()).allocate(1)) Hll4Array<A>(srcHllArr);
147
141
  }
148
142
 
149
143
  template<typename A>
150
144
  Hll6Array<A>* HllSketchImplFactory<A>::convertToHll6(const HllArray<A>& srcHllArr) {
151
- const uint8_t lgConfigK = srcHllArr.getLgConfigK();
152
145
  using Hll6Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>>;
153
- Hll6Array<A>* hll6Array = new (Hll6Alloc(srcHllArr.getAllocator()).allocate(1))
154
- Hll6Array<A>(lgConfigK, srcHllArr.isStartFullSize(), srcHllArr.getAllocator());
155
- hll6Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
156
- hll6Array->mergeHll(srcHllArr);
157
- hll6Array->putHipAccum(srcHllArr.getHipAccum());
158
- return hll6Array;
146
+ return new (Hll6Alloc(srcHllArr.getAllocator()).allocate(1)) Hll6Array<A>(srcHllArr);
159
147
  }
160
148
 
161
149
  template<typename A>
162
150
  Hll8Array<A>* HllSketchImplFactory<A>::convertToHll8(const HllArray<A>& srcHllArr) {
163
- const uint8_t lgConfigK = srcHllArr.getLgConfigK();
164
151
  using Hll8Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>>;
165
- Hll8Array<A>* hll8Array = new (Hll8Alloc(srcHllArr.getAllocator()).allocate(1))
166
- Hll8Array<A>(lgConfigK, srcHllArr.isStartFullSize(), srcHllArr.getAllocator());
167
- hll8Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
168
- hll8Array->mergeHll(srcHllArr);
169
- hll8Array->putHipAccum(srcHllArr.getHipAccum());
170
- return hll8Array;
152
+ return new (Hll8Alloc(srcHllArr.getAllocator()).allocate(1)) Hll8Array<A>(srcHllArr);
171
153
  }
172
154
 
173
155
  }
@@ -131,21 +131,29 @@ void hll_union_alloc<A>::coupon_update(uint32_t coupon) {
131
131
 
132
132
  template<typename A>
133
133
  double hll_union_alloc<A>::get_estimate() const {
134
+ if (gadget_.sketch_impl->getCurMode() == hll_mode::HLL)
135
+ static_cast<HllArray<A>*>(gadget_.sketch_impl)->check_rebuild_kxq_cur_min();
134
136
  return gadget_.get_estimate();
135
137
  }
136
138
 
137
139
  template<typename A>
138
140
  double hll_union_alloc<A>::get_composite_estimate() const {
141
+ if (gadget_.sketch_impl->getCurMode() == hll_mode::HLL)
142
+ static_cast<HllArray<A>*>(gadget_.sketch_impl)->check_rebuild_kxq_cur_min();
139
143
  return gadget_.get_composite_estimate();
140
144
  }
141
145
 
142
146
  template<typename A>
143
147
  double hll_union_alloc<A>::get_lower_bound(uint8_t num_std_dev) const {
148
+ if (gadget_.sketch_impl->getCurMode() == hll_mode::HLL)
149
+ static_cast<HllArray<A>*>(gadget_.sketch_impl)->check_rebuild_kxq_cur_min();
144
150
  return gadget_.get_lower_bound(num_std_dev);
145
151
  }
146
152
 
147
153
  template<typename A>
148
154
  double hll_union_alloc<A>::get_upper_bound(uint8_t num_std_dev) const {
155
+ if (gadget_.sketch_impl->getCurMode() == hll_mode::HLL)
156
+ static_cast<HllArray<A>*>(gadget_.sketch_impl)->check_rebuild_kxq_cur_min();
149
157
  return gadget_.get_upper_bound(num_std_dev);
150
158
  }
151
159
 
@@ -124,8 +124,6 @@ public:
124
124
  static uint32_t pair(uint32_t slotNo, uint8_t value);
125
125
  static uint32_t getLow26(uint32_t coupon);
126
126
  static uint8_t getValue(uint32_t coupon);
127
- static double invPow2(uint8_t e);
128
- static uint8_t ceilingPowerOf2(uint32_t n);
129
127
  static uint8_t simpleIntLog2(uint32_t n); // n must be power of 2
130
128
  static uint8_t computeLgArrInts(hll_mode mode, uint32_t count, uint8_t lgConfigK);
131
129
  static double getRelErr(bool upperBound, bool unioned, uint8_t lgConfigK, uint8_t numStdDev);
@@ -152,12 +150,6 @@ inline void HllUtil<A>::hash(const void* key, size_t keyLen, uint64_t seed, Hash
152
150
  MurmurHash3_x64_128(key, keyLen, seed, result);
153
151
  }
154
152
 
155
- template<typename A>
156
- inline double HllUtil<A>::getRelErr(bool upperBound, bool unioned,
157
- uint8_t lgConfigK, uint8_t numStdDev) {
158
- return RelativeErrorTables<A>::getRelErr(upperBound, unioned, lgConfigK, numStdDev);
159
- }
160
-
161
153
  template<typename A>
162
154
  inline uint8_t HllUtil<A>::checkLgK(uint8_t lgK) {
163
155
  if ((lgK >= hll_constants::MIN_LOG_K) && (lgK <= hll_constants::MAX_LOG_K)) {
@@ -167,6 +159,20 @@ inline uint8_t HllUtil<A>::checkLgK(uint8_t lgK) {
167
159
  }
168
160
  }
169
161
 
162
+ template<typename A>
163
+ inline double HllUtil<A>::getRelErr(bool upperBound, bool unioned,
164
+ uint8_t lgConfigK, uint8_t numStdDev) {
165
+ checkLgK(lgConfigK);
166
+ if (lgConfigK > 12) {
167
+ const double rseFactor = unioned ?
168
+ hll_constants::HLL_NON_HIP_RSE_FACTOR : hll_constants::HLL_HIP_RSE_FACTOR;
169
+ const uint32_t configK = 1 << lgConfigK;
170
+ return (upperBound ? -1 : 1) * (numStdDev * rseFactor) / sqrt(configK);
171
+ } else {
172
+ return RelativeErrorTables<A>::getRelErr(upperBound, unioned, lgConfigK, numStdDev);
173
+ }
174
+ }
175
+
170
176
  template<typename A>
171
177
  inline void HllUtil<A>::checkMemSize(uint64_t minBytes, uint64_t capBytes) {
172
178
  if (capBytes < minBytes) {
@@ -196,16 +202,6 @@ inline uint8_t HllUtil<A>::getValue(uint32_t coupon) {
196
202
  return coupon >> hll_constants::KEY_BITS_26;
197
203
  }
198
204
 
199
- template<typename A>
200
- inline double HllUtil<A>::invPow2(uint8_t e) {
201
- union {
202
- long long longVal;
203
- double doubleVal;
204
- } conv;
205
- conv.longVal = (1023L - e) << 52;
206
- return conv.doubleVal;
207
- }
208
-
209
205
  template<typename A>
210
206
  inline uint8_t HllUtil<A>::simpleIntLog2(uint32_t n) {
211
207
  if (n == 0) {
@@ -47,7 +47,7 @@ bool coupon_iterator<A>::operator!=(const coupon_iterator& other) const {
47
47
  }
48
48
 
49
49
  template<typename A>
50
- uint32_t coupon_iterator<A>::operator*() const {
50
+ auto coupon_iterator<A>::operator*() const -> reference {
51
51
  return array_[index_];
52
52
  }
53
53
 
@@ -23,12 +23,18 @@
23
23
  namespace datasketches {
24
24
 
25
25
  template<typename A>
26
- class coupon_iterator: public std::iterator<std::input_iterator_tag, uint32_t> {
26
+ class coupon_iterator {
27
27
  public:
28
+ using iterator_category = std::input_iterator_tag;
29
+ using value_type = uint32_t;
30
+ using difference_type = void;
31
+ using pointer = uint32_t*;
32
+ using reference = uint32_t;
33
+
28
34
  coupon_iterator(const uint32_t* array, size_t array_slze, size_t index, bool all);
29
35
  coupon_iterator& operator++();
30
36
  bool operator!=(const coupon_iterator& other) const;
31
- uint32_t operator*() const;
37
+ reference operator*() const;
32
38
  private:
33
39
  const uint32_t* array_;
34
40
  size_t array_size_;
@@ -23,46 +23,22 @@
23
23
  #include "common_defs.hpp"
24
24
  #include "HllUtil.hpp"
25
25
 
26
- #include <memory>
27
26
  #include <iostream>
27
+ #include <memory>
28
+ #include <string>
28
29
  #include <vector>
29
30
 
30
31
  namespace datasketches {
31
32
 
32
- /**
33
- * This is a high performance implementation of Phillipe Flajolet&#8217;s HLL sketch but with
34
- * significantly improved error behavior. If the ONLY use case for sketching is counting
35
- * uniques and merging, the HLL sketch is a reasonable choice, although the highest performing in terms of accuracy for
36
- * storage space consumed is CPC (Compressed Probabilistic Counting). For large enough counts, this HLL version (with HLL_4) can be 2 to
37
- * 16 times smaller than the Theta sketch family for the same accuracy.
38
- *
39
- * <p>This implementation offers three different types of HLL sketch, each with different
40
- * trade-offs with accuracy, space and performance. These types are specified with the
41
- * {@link TgtHllType} parameter.
42
- *
43
- * <p>In terms of accuracy, all three types, for the same <i>lg_config_k</i>, have the same error
44
- * distribution as a function of <i>n</i>, the number of unique values fed to the sketch.
45
- * The configuration parameter <i>lg_config_k</i> is the log-base-2 of <i>K</i>,
46
- * where <i>K</i> is the number of buckets or slots for the sketch.
47
- *
48
- * <p>During warmup, when the sketch has only received a small number of unique items
49
- * (up to about 10% of <i>K</i>), this implementation leverages a new class of estimator
50
- * algorithms with significantly better accuracy.
51
- *
52
- * <p>This sketch also offers the capability of operating off-heap. Given a WritableMemory object
53
- * created by the user, the sketch will perform all of its updates and internal phase transitions
54
- * in that object, which can actually reside either on-heap or off-heap based on how it is
55
- * configured. In large systems that must update and merge many millions of sketches, having the
56
- * sketch operate off-heap avoids the serialization and deserialization costs of moving sketches
57
- * to and from off-heap memory-mapped files, for example, and eliminates big garbage collection
58
- * delays.
59
- *
60
- * author Jon Malkin
61
- * author Lee Rhodes
62
- * author Kevin Lang
63
- */
33
+ // forward declarations
34
+ template<typename A> class hll_sketch_alloc;
35
+ template<typename A> class hll_union_alloc;
36
+
37
+ /// HLL sketch alias with default allocator
38
+ using hll_sketch = hll_sketch_alloc<std::allocator<uint8_t>>;
39
+ /// HLL union alias with default allocator
40
+ using hll_union = hll_union_alloc<std::allocator<uint8_t>>;
64
41
 
65
-
66
42
  /**
67
43
  * Specifies the target type of HLL sketch to be created. It is a target in that the actual
68
44
  * allocation of the HLL array is deferred until sufficient number of items have been received by
@@ -99,14 +75,41 @@ enum target_hll_type {
99
75
  HLL_8 ///< 8 bits per entry (fastest, fixed size)
100
76
  };
101
77
 
102
- template<typename A>
103
- class HllSketchImpl;
104
-
105
- template<typename A>
106
- class hll_union_alloc;
78
+ /**
79
+ * This is a high performance implementation of Phillipe Flajolet's HLL sketch but with
80
+ * significantly improved error behavior. If the ONLY use case for sketching is counting
81
+ * uniques and merging, the HLL sketch is a reasonable choice, although the highest performing in terms of accuracy for
82
+ * storage space consumed is CPC (Compressed Probabilistic Counting). For large enough counts, this HLL version (with HLL_4) can be 2 to
83
+ * 16 times smaller than the Theta sketch family for the same accuracy.
84
+ *
85
+ * <p>This implementation offers three different types of HLL sketch, each with different
86
+ * trade-offs with accuracy, space and performance. These types are specified with the
87
+ * {@link target_hll_type} parameter.
88
+ *
89
+ * <p>In terms of accuracy, all three types, for the same <i>lg_config_k</i>, have the same error
90
+ * distribution as a function of <i>n</i>, the number of unique values fed to the sketch.
91
+ * The configuration parameter <i>lg_config_k</i> is the log-base-2 of <i>K</i>,
92
+ * where <i>K</i> is the number of buckets or slots for the sketch.
93
+ *
94
+ * <p>During warmup, when the sketch has only received a small number of unique items
95
+ * (up to about 10% of <i>K</i>), this implementation leverages a new class of estimator
96
+ * algorithms with significantly better accuracy.
97
+ *
98
+ * <p>This sketch also offers the capability of operating off-heap. Given a WritableMemory object
99
+ * created by the user, the sketch will perform all of its updates and internal phase transitions
100
+ * in that object, which can actually reside either on-heap or off-heap based on how it is
101
+ * configured. In large systems that must update and merge many millions of sketches, having the
102
+ * sketch operate off-heap avoids the serialization and deserialization costs of moving sketches
103
+ * to and from off-heap memory-mapped files, for example, and eliminates big garbage collection
104
+ * delays.
105
+ *
106
+ * author Jon Malkin
107
+ * author Lee Rhodes
108
+ * author Kevin Lang
109
+ */
107
110
 
108
- template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
109
- template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
111
+ // forward declaration
112
+ template<typename A> class HllSketchImpl;
110
113
 
111
114
  template<typename A = std::allocator<uint8_t> >
112
115
  class hll_sketch_alloc final {
@@ -118,45 +121,60 @@ class hll_sketch_alloc final {
118
121
  * @param start_full_size Indicates whether to start in HLL mode,
119
122
  * keeping memory use constant (if HLL_6 or HLL_8) at the cost of
120
123
  * starting out using much more memory
124
+ * @param allocator instance of an Allocator
121
125
  */
122
126
  explicit hll_sketch_alloc(uint8_t lg_config_k, target_hll_type tgt_type = HLL_4, bool start_full_size = false, const A& allocator = A());
123
127
 
124
128
  /**
125
129
  * Copy constructor
130
+ * @param that sketch to be copied
126
131
  */
127
132
  hll_sketch_alloc(const hll_sketch_alloc<A>& that);
128
133
 
129
134
  /**
130
135
  * Copy constructor to a new target type
136
+ * @param that sketch to be copied
137
+ * @param tgt_type target_hll_type
131
138
  */
132
139
  hll_sketch_alloc(const hll_sketch_alloc<A>& that, target_hll_type tgt_type);
133
140
 
134
141
  /**
135
142
  * Move constructor
143
+ * @param that sketch to be moved
136
144
  */
137
145
  hll_sketch_alloc(hll_sketch_alloc<A>&& that) noexcept;
138
146
 
139
147
  /**
140
148
  * Reconstructs a sketch from a serialized image on a stream.
141
149
  * @param is An input stream with a binary image of a sketch
150
+ * @param allocator instance of an Allocator
142
151
  */
143
152
  static hll_sketch_alloc deserialize(std::istream& is, const A& allocator = A());
144
153
 
145
154
  /**
146
155
  * Reconstructs a sketch from a serialized image in a byte array.
147
- * @param is bytes An input array with a binary image of a sketch
156
+ * @param bytes An input array with a binary image of a sketch
148
157
  * @param len Length of the input array, in bytes
158
+ * @param allocator instance of an Allocator
149
159
  */
150
160
  static hll_sketch_alloc deserialize(const void* bytes, size_t len, const A& allocator = A());
151
161
 
152
162
  //! Class destructor
153
163
  virtual ~hll_sketch_alloc();
154
164
 
155
- //! Copy assignment operator
156
- hll_sketch_alloc operator=(const hll_sketch_alloc<A>& other);
165
+ /**
166
+ * Copy assignment operator
167
+ * @param other sketch to be copied
168
+ * @return reference to this sketch
169
+ */
170
+ hll_sketch_alloc& operator=(const hll_sketch_alloc<A>& other);
157
171
 
158
- //! Move assignment operator
159
- hll_sketch_alloc operator=(hll_sketch_alloc<A>&& other);
172
+ /**
173
+ * Move assignment operator
174
+ * @param other sketch to be moved
175
+ * @return reference to this sketch
176
+ */
177
+ hll_sketch_alloc& operator=(hll_sketch_alloc<A>&& other);
160
178
 
161
179
  /**
162
180
  * Resets the sketch to an empty state in coupon collection mode.
@@ -164,18 +182,22 @@ class hll_sketch_alloc final {
164
182
  */
165
183
  void reset();
166
184
 
167
- typedef vector_u8<A> vector_bytes; // alias for users
185
+ // This is a convenience alias for users
186
+ // The type returned by the following serialize method
187
+ using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
168
188
 
169
189
  /**
170
190
  * Serializes the sketch to a byte array, compacting data structures
171
191
  * where feasible to eliminate unused storage in the serialized image.
172
192
  * @param header_size_bytes Allows for PostgreSQL integration
193
+ * @return serialized sketch in binary form
173
194
  */
174
195
  vector_bytes serialize_compact(unsigned header_size_bytes = 0) const;
175
196
 
176
197
  /**
177
198
  * Serializes the sketch to a byte array, retaining all internal
178
199
  * data structures in their current form.
200
+ * @return serialized sketch in binary form
179
201
  */
180
202
  vector_bytes serialize_updatable() const;
181
203
 
@@ -197,7 +219,7 @@ class hll_sketch_alloc final {
197
219
  * Human readable summary with optional detail
198
220
  * @param summary if true, output the sketch summary
199
221
  * @param detail if true, output the internal data array
200
- * @param auxDetail if true, output the internal Aux array, if it exists.
222
+ * @param aux_detail if true, output the internal Aux array, if it exists.
201
223
  * @param all if true, outputs all entries including empty ones
202
224
  * @return human readable string with optional detail.
203
225
  */
@@ -358,7 +380,7 @@ class hll_sketch_alloc final {
358
380
  * value can be exceeded in extremely rare cases. If exceeded, it
359
381
  * will be larger by only a few percent.
360
382
  *
361
- * @param lg_config_k The Log2 of K for the target HLL sketch. This value must be
383
+ * @param lg_k The Log2 of K for the target HLL sketch. This value must be
362
384
  * between 4 and 21 inclusively.
363
385
  * @param tgt_type the desired Hll type
364
386
  * @return the maximum size in bytes that this sketch can grow to.
@@ -391,8 +413,6 @@ class hll_sketch_alloc final {
391
413
  bool is_out_of_order_flag() const;
392
414
  bool is_estimation_mode() const;
393
415
 
394
- typedef typename std::allocator_traits<A>::template rebind_alloc<hll_sketch_alloc> AllocHllSketch;
395
-
396
416
  HllSketchImpl<A>* sketch_impl;
397
417
  friend hll_union_alloc<A>;
398
418
  };
@@ -412,8 +432,8 @@ class hll_sketch_alloc final {
412
432
  * <p>Although the API for this union operator parallels many of the methods of the
413
433
  * <i>HllSketch</i>, the behavior of the union operator has some fundamental differences.
414
434
  *
415
- * <p>First, the user cannot specify the #tgt_hll_type as an input parameter.
416
- * Instead, it is specified for the sketch returned with #get_result(tgt_hll_tyope).
435
+ * <p>First, the user cannot specify the #target_hll_type as an input parameter.
436
+ * Instead, it is specified for the sketch returned with #get_result.
417
437
  *
418
438
  * <p>Second, the internal effective value of log-base-2 of <i>k</i> for the union operation can
419
439
  * change dynamically based on the smallest <i>lg_config_k</i> that the union operation has seen.
@@ -422,7 +442,6 @@ class hll_sketch_alloc final {
422
442
  * author Lee Rhodes
423
443
  * author Kevin Lang
424
444
  */
425
-
426
445
  template<typename A = std::allocator<uint8_t> >
427
446
  class hll_union_alloc {
428
447
  public:
@@ -430,6 +449,7 @@ class hll_union_alloc {
430
449
  * Construct an hll_union operator with the given maximum log2 of k.
431
450
  * @param lg_max_k The maximum size, in log2, of k. The value must
432
451
  * be between 7 and 21, inclusive.
452
+ * @param allocator instance of an Allocator
433
453
  */
434
454
  explicit hll_union_alloc(uint8_t lg_max_k, const A& allocator = A());
435
455
 
@@ -494,21 +514,21 @@ class hll_union_alloc {
494
514
 
495
515
  /**
496
516
  * Returns the result of this union operator with the specified
497
- * #tgt_hll_type.
498
- * @param The tgt_hll_type enum value of the desired result (Default: HLL_4)
517
+ * #target_hll_type.
518
+ * @param tgt_type The tgt_hll_type enum value of the desired result (Default: HLL_4)
499
519
  * @return The result of this union with the specified tgt_hll_type
500
520
  */
501
521
  hll_sketch_alloc<A> get_result(target_hll_type tgt_type = HLL_4) const;
502
522
 
503
523
  /**
504
524
  * Update this union operator with the given sketch.
505
- * @param The given sketch.
525
+ * @param sketch The given sketch.
506
526
  */
507
527
  void update(const hll_sketch_alloc<A>& sketch);
508
528
 
509
529
  /**
510
530
  * Update this union operator with the given temporary sketch.
511
- * @param The given sketch.
531
+ * @param sketch The given sketch.
512
532
  */
513
533
  void update(hll_sketch_alloc<A>&& sketch);
514
534
 
@@ -608,7 +628,7 @@ class hll_union_alloc {
608
628
  * perform the union. This may involve swapping, down-sampling, transforming, and / or
609
629
  * copying one of the arguments and may completely replace the internals of the union.
610
630
  *
611
- * @param incoming_impl the given incoming sketch, which may not be modified.
631
+ * @param sketch the given incoming sketch, which may not be modified.
612
632
  * @param lg_max_k the maximum value of log2 K for this union.
613
633
  */
614
634
  inline void union_impl(const hll_sketch_alloc<A>& sketch, uint8_t lg_max_k);
@@ -628,12 +648,6 @@ class hll_union_alloc {
628
648
  hll_sketch_alloc<A> gadget_;
629
649
  };
630
650
 
631
- /// convenience alias for hll_sketch with default allocator
632
- typedef hll_sketch_alloc<> hll_sketch;
633
-
634
- /// convenience alias for hll_union with default allocator
635
- typedef hll_union_alloc<> hll_union;
636
-
637
651
  } // namespace datasketches
638
652
 
639
653
  #include "hll.private.hpp"
@@ -20,7 +20,6 @@ add_executable(hll_test)
20
20
  target_link_libraries(hll_test hll common_test_lib)
21
21
 
22
22
  set_target_properties(hll_test PROPERTIES
23
- CXX_STANDARD 11
24
23
  CXX_STANDARD_REQUIRED YES
25
24
  )
26
25
 
@@ -49,3 +48,17 @@ target_sources(hll_test
49
48
  ToFromByteArrayTest.cpp
50
49
  IsomorphicTest.cpp
51
50
  )
51
+
52
+ if (SERDE_COMPAT)
53
+ target_sources(hll_test
54
+ PRIVATE
55
+ hll_sketch_deserialize_from_java_test.cpp
56
+ )
57
+ endif()
58
+
59
+ if (GENERATE)
60
+ target_sources(hll_test
61
+ PRIVATE
62
+ hll_sketch_serialize_for_java.cpp
63
+ )
64
+ endif()
@@ -53,11 +53,16 @@ static void basicUnion(uint64_t n1, uint64_t n2,
53
53
  v += n2;
54
54
 
55
55
  hll_union u(lgMaxK);
56
- u.update(std::move(h1));
56
+ u.update(h1);
57
57
  u.update(h2);
58
58
 
59
59
  hll_sketch result = u.get_result(resultType);
60
60
 
61
+ // ensure we check a direct union estimate, without first caling get_result()
62
+ u.reset();
63
+ u.update(std::move(h1));
64
+ u.update(h2);
65
+
61
66
  // force non-HIP estimates to avoid issues with in- vs out-of-order
62
67
  double uEst = result.get_composite_estimate();
63
68
  double uUb = result.get_upper_bound(2);
@@ -74,6 +79,7 @@ static void basicUnion(uint64_t n1, uint64_t n2,
74
79
  REQUIRE((uEst - uLb) >= 0.0);
75
80
 
76
81
  REQUIRE(controlEst == uEst);
82
+ REQUIRE(controlEst == u.get_composite_estimate());
77
83
  }
78
84
 
79
85
  /**
@@ -53,74 +53,6 @@ TEST_CASE("hll to/from byte array: double serialize", "[hll_byte_array]") {
53
53
  }
54
54
  }
55
55
 
56
- TEST_CASE("hll to/from byte array: deserialize from java", "[hll_byte_array]") {
57
- std::string inputPath;
58
- #ifdef TEST_BINARY_INPUT_PATH
59
- inputPath = TEST_BINARY_INPUT_PATH;
60
- #else
61
- inputPath = "test/";
62
- #endif
63
-
64
- std::ifstream ifs;
65
- ifs.open(inputPath + "list_from_java.sk", std::ios::binary);
66
- hll_sketch sk = hll_sketch::deserialize(ifs);
67
- REQUIRE(sk.is_empty() == false);
68
- REQUIRE(sk.get_lg_config_k() == 8);
69
- REQUIRE(sk.get_lower_bound(1) == 7.0);
70
- REQUIRE(sk.get_estimate() == Approx(7.0).margin(1e-6));
71
- REQUIRE(sk.get_upper_bound(1) == Approx(7.000350).margin(1e-5));
72
- ifs.close();
73
-
74
- ifs.open(inputPath + "compact_set_from_java.sk", std::ios::binary);
75
- sk = hll_sketch::deserialize(ifs);
76
- REQUIRE(sk.is_empty() == false);
77
- REQUIRE(sk.get_lg_config_k() == 8);
78
- REQUIRE(sk.get_lower_bound(1) == 24.0);
79
- REQUIRE(sk.get_estimate() == Approx(24.0).margin(1e-5));
80
- REQUIRE(sk.get_upper_bound(1) == Approx(24.001200).margin(1e-5));
81
- ifs.close();
82
-
83
- ifs.open(inputPath + "updatable_set_from_java.sk", std::ios::binary);
84
- sk = hll_sketch::deserialize(ifs);
85
- REQUIRE(sk.is_empty() == false);
86
- REQUIRE(sk.get_lg_config_k() == 8);
87
- REQUIRE(sk.get_lower_bound(1) == 24.0);
88
- REQUIRE(sk.get_estimate() == Approx(24.0).margin(1e-5));
89
- REQUIRE(sk.get_upper_bound(1) == Approx(24.001200).margin(1e-5));
90
- ifs.close();
91
-
92
-
93
- ifs.open(inputPath + "array6_from_java.sk", std::ios::binary);
94
- sk = hll_sketch::deserialize(ifs);
95
- REQUIRE(sk.is_empty() == false);
96
- REQUIRE(sk.get_lg_config_k() == 8);
97
- REQUIRE(sk.get_lower_bound(1) == Approx(9589.968564).margin(1e-5));
98
- REQUIRE(sk.get_estimate() == Approx(10089.150211).margin(1e-5));
99
- REQUIRE(sk.get_upper_bound(1) == Approx(10642.370492).margin(1e-5));
100
- ifs.close();
101
-
102
-
103
- ifs.open(inputPath + "compact_array4_from_java.sk", std::ios::binary);
104
- sk = hll_sketch::deserialize(ifs);
105
- REQUIRE(sk.is_empty() == false);
106
- REQUIRE(sk.get_lg_config_k() == 8);
107
- REQUIRE(sk.get_lower_bound(1) == Approx(9589.968564).margin(1e-5));
108
- REQUIRE(sk.get_estimate() == Approx(10089.150211).margin(1e-5));
109
- REQUIRE(sk.get_upper_bound(1) == Approx(10642.370492).margin(1e-5));
110
-
111
- ifs.close();
112
-
113
-
114
- ifs.open(inputPath + "updatable_array4_from_java.sk", std::ios::binary);
115
- sk = hll_sketch::deserialize(ifs);
116
- REQUIRE(sk.is_empty() == false);
117
- REQUIRE(sk.get_lg_config_k() == 8);
118
- REQUIRE(sk.get_lower_bound(1) == Approx(9589.968564).margin(1e-5));
119
- REQUIRE(sk.get_estimate() == Approx(10089.150211).margin(1e-5));
120
- REQUIRE(sk.get_upper_bound(1) == Approx(10642.370492).margin(1e-5));
121
- ifs.close();
122
- }
123
-
124
56
  static void checkSketchEquality(hll_sketch& sk1, hll_sketch& sk2) {
125
57
  REQUIRE(sk1.get_lg_config_k() == sk2.get_lg_config_k());
126
58
  REQUIRE(sk1.get_lower_bound(1) == sk2.get_lower_bound(1));