datasketches 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,43 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _INTARRAYPAIRITERATOR_HPP_
21
+ #define _INTARRAYPAIRITERATOR_HPP_
22
+
23
+ namespace datasketches {
24
+
25
+ template<typename A>
26
+ class coupon_iterator: public std::iterator<std::input_iterator_tag, uint32_t> {
27
+ public:
28
+ coupon_iterator(const int* array, size_t array_slze, size_t index, bool all);
29
+ coupon_iterator& operator++();
30
+ bool operator!=(const coupon_iterator& other) const;
31
+ uint32_t operator*() const;
32
+ private:
33
+ const int* array;
34
+ size_t array_size;
35
+ size_t index;
36
+ bool all;
37
+ };
38
+
39
+ }
40
+
41
+ #include "coupon_iterator-internal.hpp"
42
+
43
+ #endif /* _INTARRAYPAIRITERATOR_HPP_ */
@@ -0,0 +1,669 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _HLL_HPP_
21
+ #define _HLL_HPP_
22
+
23
+ #include "common_defs.hpp"
24
+ #include "HllUtil.hpp"
25
+
26
+ #include <memory>
27
+ #include <iostream>
28
+ #include <vector>
29
+
30
+ namespace datasketches {
31
+
32
+ /**
33
+ * This is a high performance implementation of Phillipe Flajolet&#8217;s HLL sketch but with
34
+ * significantly improved error behavior. If the ONLY use case for sketching is counting
35
+ * uniques and merging, the HLL sketch is a reasonable choice, although the highest performing in terms of accuracy for
36
+ * storage space consumed is CPC (Compressed Probabilistic Counting). For large enough counts, this HLL version (with HLL_4) can be 2 to
37
+ * 16 times smaller than the Theta sketch family for the same accuracy.
38
+ *
39
+ * <p>This implementation offers three different types of HLL sketch, each with different
40
+ * trade-offs with accuracy, space and performance. These types are specified with the
41
+ * {@link TgtHllType} parameter.
42
+ *
43
+ * <p>In terms of accuracy, all three types, for the same <i>lg_config_k</i>, have the same error
44
+ * distribution as a function of <i>n</i>, the number of unique values fed to the sketch.
45
+ * The configuration parameter <i>lg_config_k</i> is the log-base-2 of <i>K</i>,
46
+ * where <i>K</i> is the number of buckets or slots for the sketch.
47
+ *
48
+ * <p>During warmup, when the sketch has only received a small number of unique items
49
+ * (up to about 10% of <i>K</i>), this implementation leverages a new class of estimator
50
+ * algorithms with significantly better accuracy.
51
+ *
52
+ * <p>This sketch also offers the capability of operating off-heap. Given a WritableMemory object
53
+ * created by the user, the sketch will perform all of its updates and internal phase transitions
54
+ * in that object, which can actually reside either on-heap or off-heap based on how it is
55
+ * configured. In large systems that must update and merge many millions of sketches, having the
56
+ * sketch operate off-heap avoids the serialization and deserialization costs of moving sketches
57
+ * to and from off-heap memory-mapped files, for example, and eliminates big garbage collection
58
+ * delays.
59
+ *
60
+ * author Jon Malkin
61
+ * author Lee Rhodes
62
+ * author Kevin Lang
63
+ */
64
+
65
+
66
+ /**
67
+ * Specifies the target type of HLL sketch to be created. It is a target in that the actual
68
+ * allocation of the HLL array is deferred until sufficient number of items have been received by
69
+ * the warm-up phases.
70
+ *
71
+ * <p>These three target types are isomorphic representations of the same underlying HLL algorithm.
72
+ * Thus, given the same value of <i>lg_config_k</i> and the same input, all three HLL target types
73
+ * will produce identical estimates and have identical error distributions.</p>
74
+ *
75
+ * <p>The memory (and also the serialization) of the sketch during this early warmup phase starts
76
+ * out very small (8 bytes, when empty) and then grows in increments of 4 bytes as required
77
+ * until the full HLL array is allocated. This transition point occurs at about 10% of K for
78
+ * sketches where lg_config_k is &gt; 8.</p>
79
+ *
80
+ * <ul>
81
+ * <li><b>HLL_8</b> This uses an 8-bit byte per HLL bucket. It is generally the
82
+ * fastest in terms of update time, but has the largest storage footprint of about
83
+ * <i>K</i> bytes.</li>
84
+ *
85
+ * <li><b>HLL_6</b> This uses a 6-bit field per HLL bucket. It is the generally the next fastest
86
+ * in terms of update time with a storage footprint of about <i>3/4 * K</i> bytes.</li>
87
+ *
88
+ * <li><b>HLL_4</b> This uses a 4-bit field per HLL bucket and for large counts may require
89
+ * the use of a small internal auxiliary array for storing statistical exceptions, which are rare.
90
+ * For the values of <i>lg_config_k &gt; 13</i> (<i>K</i> = 8192),
91
+ * this additional array adds about 3% to the overall storage. It is generally the slowest in
92
+ * terms of update time, but has the smallest storage footprint of about
93
+ * <i>K/2 * 1.03</i> bytes.</li>
94
+ * </ul>
95
+ */
96
+ enum target_hll_type {
97
+ HLL_4, ///< 4 bits per entry (most compact, size may vary)
98
+ HLL_6, ///< 6 bits per entry (fixed size)
99
+ HLL_8 ///< 8 bits per entry (fastest, fixed size)
100
+ };
101
+
102
+ template<typename A>
103
+ class HllSketchImpl;
104
+
105
+ template<typename A>
106
+ class hll_union_alloc;
107
+
108
+ template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
109
+ template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
110
+
111
+ template<typename A = std::allocator<char> >
112
+ class hll_sketch_alloc final {
113
+ public:
114
+ /**
115
+ * Constructs a new HLL sketch.
116
+ * @param lg_config_k Sketch can hold 2^lg_config_k rows
117
+ * @param tgt_type The HLL mode to use, if/when the sketch reaches that state
118
+ * @param start_full_size Indicates whether to start in HLL mode,
119
+ * keeping memory use constant (if HLL_6 or HLL_8) at the cost of
120
+ * starting out using much more memory
121
+ */
122
+ explicit hll_sketch_alloc(int lg_config_k, target_hll_type tgt_type = HLL_4, bool start_full_size = false);
123
+
124
+ /**
125
+ * Copy constructor
126
+ */
127
+ hll_sketch_alloc(const hll_sketch_alloc<A>& that);
128
+
129
+ /**
130
+ * Copy constructor to a new target type
131
+ */
132
+ hll_sketch_alloc(const hll_sketch_alloc<A>& that, target_hll_type tgt_type);
133
+
134
+ /**
135
+ * Move constructor
136
+ */
137
+ hll_sketch_alloc(hll_sketch_alloc<A>&& that) noexcept;
138
+
139
+ /**
140
+ * Reconstructs a sketch from a serialized image on a stream.
141
+ * @param is An input stream with a binary image of a sketch
142
+ */
143
+ static hll_sketch_alloc deserialize(std::istream& is);
144
+
145
+ /**
146
+ * Reconstructs a sketch from a serialized image in a byte array.
147
+ * @param is bytes An input array with a binary image of a sketch
148
+ * @param len Length of the input array, in bytes
149
+ */
150
+ static hll_sketch_alloc deserialize(const void* bytes, size_t len);
151
+
152
+ //! Class destructor
153
+ virtual ~hll_sketch_alloc();
154
+
155
+ //! Copy assignment operator
156
+ hll_sketch_alloc operator=(const hll_sketch_alloc<A>& other);
157
+
158
+ //! Move assignment operator
159
+ hll_sketch_alloc operator=(hll_sketch_alloc<A>&& other);
160
+
161
+ /**
162
+ * Resets the sketch to an empty state in coupon collection mode.
163
+ * Does not re-use existing internal objects.
164
+ */
165
+ void reset();
166
+
167
+ typedef vector_u8<A> vector_bytes; // alias for users
168
+
169
+ /**
170
+ * Serializes the sketch to a byte array, compacting data structures
171
+ * where feasible to eliminate unused storage in the serialized image.
172
+ * @param header_size_bytes Allows for PostgreSQL integration
173
+ */
174
+ vector_bytes serialize_compact(unsigned header_size_bytes = 0) const;
175
+
176
+ /**
177
+ * Serializes the sketch to a byte array, retaining all internal
178
+ * data structures in their current form.
179
+ */
180
+ vector_bytes serialize_updatable() const;
181
+
182
+ /**
183
+ * Serializes the sketch to an ostream, compacting data structures
184
+ * where feasible to eliminate unused storage in the serialized image.
185
+ * @param os std::ostream to use for output.
186
+ */
187
+ void serialize_compact(std::ostream& os) const;
188
+
189
+ /**
190
+ * Serializes the sketch to an ostream, retaining all internal data
191
+ * structures in their current form.
192
+ * @param os std::ostream to use for output.
193
+ */
194
+ void serialize_updatable(std::ostream& os) const;
195
+
196
+ /**
197
+ * Human readable summary with optional detail
198
+ * @param summary if true, output the sketch summary
199
+ * @param detail if true, output the internal data array
200
+ * @param auxDetail if true, output the internal Aux array, if it exists.
201
+ * @param all if true, outputs all entries including empty ones
202
+ * @return human readable string with optional detail.
203
+ */
204
+ string<A> to_string(bool summary = true,
205
+ bool detail = false,
206
+ bool aux_detail = false,
207
+ bool all = false) const;
208
+
209
+ /**
210
+ * Present the given std::string as a potential unique item.
211
+ * The string is converted to a byte array using UTF8 encoding.
212
+ * If the string is null or empty no update attempt is made and the method returns.
213
+ * @param datum The given string.
214
+ */
215
+ void update(const std::string& datum);
216
+
217
+ /**
218
+ * Present the given unsigned 64-bit integer as a potential unique item.
219
+ * @param datum The given integer.
220
+ */
221
+ void update(uint64_t datum);
222
+
223
+ /**
224
+ * Present the given unsigned 32-bit integer as a potential unique item.
225
+ * @param datum The given integer.
226
+ */
227
+ void update(uint32_t datum);
228
+
229
+ /**
230
+ * Present the given unsigned 16-bit integer as a potential unique item.
231
+ * @param datum The given integer.
232
+ */
233
+ void update(uint16_t datum);
234
+
235
+ /**
236
+ * Present the given unsigned 8-bit integer as a potential unique item.
237
+ * @param datum The given integer.
238
+ */
239
+ void update(uint8_t datum);
240
+
241
+ /**
242
+ * Present the given signed 64-bit integer as a potential unique item.
243
+ * @param datum The given integer.
244
+ */
245
+ void update(int64_t datum);
246
+
247
+ /**
248
+ * Present the given signed 32-bit integer as a potential unique item.
249
+ * @param datum The given integer.
250
+ */
251
+ void update(int32_t datum);
252
+
253
+ /**
254
+ * Present the given signed 16-bit integer as a potential unique item.
255
+ * @param datum The given integer.
256
+ */
257
+ void update(int16_t datum);
258
+
259
+ /**
260
+ * Present the given signed 8-bit integer as a potential unique item.
261
+ * @param datum The given integer.
262
+ */
263
+ void update(int8_t datum);
264
+
265
+ /**
266
+ * Present the given 64-bit floating point value as a potential unique item.
267
+ * @param datum The given double.
268
+ */
269
+ void update(double datum);
270
+
271
+ /**
272
+ * Present the given 32-bit floating point value as a potential unique item.
273
+ * @param datum The given float.
274
+ */
275
+ void update(float datum);
276
+
277
+ /**
278
+ * Present the given data array as a potential unique item.
279
+ * @param data The given array.
280
+ * @param length_bytes The array length in bytes.
281
+ */
282
+ void update(const void* data, size_t length_bytes);
283
+
284
+ /**
285
+ * Returns the current cardinality estimate
286
+ * @return the cardinality estimate
287
+ */
288
+ double get_estimate() const;
289
+
290
+ /**
291
+ * This is less accurate than the getEstimate() method
292
+ * and is automatically used when the sketch has gone through
293
+ * union operations where the more accurate HIP estimator cannot
294
+ * be used.
295
+ *
296
+ * This is made public only for error characterization software
297
+ * that exists in separate packages and is not intended for normal
298
+ * use.
299
+ * @return the composite cardinality estimate
300
+ */
301
+ double get_composite_estimate() const;
302
+
303
+ /**
304
+ * Returns the approximate lower error bound given the specified
305
+ * number of standard deviations.
306
+ * @param num_std_dev Number of standard deviations, an integer from the set {1, 2, 3}.
307
+ * @return The approximate lower bound.
308
+ */
309
+ double get_lower_bound(int num_std_dev) const;
310
+
311
+ /**
312
+ * Returns the approximate upper error bound given the specified
313
+ * number of standard deviations.
314
+ * @param num_std_dev Number of standard deviations, an integer from the set {1, 2, 3}.
315
+ * @return The approximate upper bound.
316
+ */
317
+ double get_upper_bound(int num_std_dev) const;
318
+
319
+ /**
320
+ * Returns sketch's configured lg_k value.
321
+ * @return Configured lg_k value.
322
+ */
323
+ int get_lg_config_k() const;
324
+
325
+ /**
326
+ * Returns the sketch's target HLL mode (from #target_hll_type).
327
+ * @return The sketch's target HLL mode.
328
+ */
329
+ target_hll_type get_target_type() const;
330
+
331
+ /**
332
+ * Indicates if the sketch is currently stored compacted.
333
+ * @return True if the sketch is stored in compact form.
334
+ */
335
+ bool is_compact() const;
336
+
337
+ /**
338
+ * Indicates if the sketch is currently empty.
339
+ * @return True if the sketch is empty.
340
+ */
341
+ bool is_empty() const;
342
+
343
+ /**
344
+ * Returns the size of the sketch serialized in compact form.
345
+ * @return Size of the sketch serialized in compact form, in bytes.
346
+ */
347
+ int get_compact_serialization_bytes() const;
348
+
349
+ /**
350
+ * Returns the size of the sketch serialized without compaction.
351
+ * @return Size of the sketch serialized without compaction, in bytes.
352
+ */
353
+ int get_updatable_serialization_bytes() const;
354
+
355
+ /**
356
+ * Returns the maximum size in bytes that this sketch can grow to
357
+ * given lg_config_k. However, for the HLL_4 sketch type, this
358
+ * value can be exceeded in extremely rare cases. If exceeded, it
359
+ * will be larger by only a few percent.
360
+ *
361
+ * @param lg_config_k The Log2 of K for the target HLL sketch. This value must be
362
+ * between 4 and 21 inclusively.
363
+ * @param tgt_type the desired Hll type
364
+ * @return the maximum size in bytes that this sketch can grow to.
365
+ */
366
+ static int get_max_updatable_serialization_bytes(int lg_k, target_hll_type tgt_type);
367
+
368
+ /**
369
+ * Gets the current (approximate) Relative Error (RE) asymptotic values given several
370
+ * parameters. This is used primarily for testing.
371
+ * @param upper_bound return the RE for the Upper Bound, otherwise for the Lower Bound.
372
+ * @param unioned set true if the sketch is the result of a union operation.
373
+ * @param lg_config_k the configured value for the sketch.
374
+ * @param num_std_dev the given number of Standard Deviations. This must be an integer between
375
+ * 1 and 3, inclusive.
376
+ * @return the current (approximate) RelativeError
377
+ */
378
+ static double get_rel_err(bool upper_bound, bool unioned,
379
+ int lg_config_k, int num_std_dev);
380
+
381
+ private:
382
+ explicit hll_sketch_alloc(HllSketchImpl<A>* that);
383
+
384
+ void coupon_update(int coupon);
385
+
386
+ std::string type_as_string() const;
387
+ std::string mode_as_string() const;
388
+
389
+ hll_mode get_current_mode() const;
390
+ int get_serialization_version() const;
391
+ bool is_out_of_order_flag() const;
392
+ bool is_estimation_mode() const;
393
+
394
+ typedef typename std::allocator_traits<A>::template rebind_alloc<hll_sketch_alloc> AllocHllSketch;
395
+
396
+ HllSketchImpl<A>* sketch_impl;
397
+ friend hll_union_alloc<A>;
398
+ };
399
+
400
+ /**
401
+ * This performs union operations for HLL sketches. This union operator is configured with a
402
+ * <i>lgMaxK</i> instead of the normal <i>lg_config_k</i>.
403
+ *
404
+ * <p>This union operator does permit the unioning of sketches with different values of
405
+ * <i>lg_config_k</i>. The user should be aware that the resulting accuracy of a sketch returned
406
+ * at the end of the unioning process will be a function of the smallest of <i>lg_max_k</i> and
407
+ * <i>lg_config_k</i> that the union operator has seen.
408
+ *
409
+ * <p>This union operator also permits unioning of any of the three different target hll_sketch
410
+ * types.
411
+ *
412
+ * <p>Although the API for this union operator parallels many of the methods of the
413
+ * <i>HllSketch</i>, the behavior of the union operator has some fundamental differences.
414
+ *
415
+ * <p>First, the user cannot specify the #tgt_hll_type as an input parameter.
416
+ * Instead, it is specified for the sketch returned with #get_result(tgt_hll_tyope).
417
+ *
418
+ * <p>Second, the internal effective value of log-base-2 of <i>k</i> for the union operation can
419
+ * change dynamically based on the smallest <i>lg_config_k</i> that the union operation has seen.
420
+ *
421
+ * author Jon Malkin
422
+ * author Lee Rhodes
423
+ * author Kevin Lang
424
+ */
425
+
426
+ template<typename A = std::allocator<char> >
427
+ class hll_union_alloc {
428
+ public:
429
+ /**
430
+ * Construct an hll_union operator with the given maximum log2 of k.
431
+ * @param lg_max_k The maximum size, in log2, of k. The value must
432
+ * be between 7 and 21, inclusive.
433
+ */
434
+ explicit hll_union_alloc(int lg_max_k);
435
+
436
+ /**
437
+ * Returns the current cardinality estimate
438
+ * @return the cardinality estimate
439
+ */
440
+ double get_estimate() const;
441
+
442
+ /**
443
+ * This is less accurate than the get_estimate() method
444
+ * and is automatically used when the union has gone through
445
+ * union operations where the more accurate HIP estimator cannot
446
+ * be used.
447
+ *
448
+ * This is made public only for error characterization software
449
+ * that exists in separate packages and is not intended for normal
450
+ * use.
451
+ * @return the composite cardinality estimate
452
+ */
453
+ double get_composite_estimate() const;
454
+
455
+ /**
456
+ * Returns the approximate lower error bound given the specified
457
+ * number of standard deviations.
458
+ * @param num_std_dev Number of standard deviations, an integer from the set {1, 2, 3}.
459
+ * @return The approximate lower bound.
460
+ */
461
+ double get_lower_bound(int num_std_dev) const;
462
+
463
+ /**
464
+ * Returns the approximate upper error bound given the specified
465
+ * number of standard deviations.
466
+ * @param num_std_dev Number of standard deviations, an integer from the set {1, 2, 3}.
467
+ * @return The approximate upper bound.
468
+ */
469
+ double get_upper_bound(int num_std_dev) const;
470
+
471
+ /**
472
+ * Returns the size of the union serialized in compact form.
473
+ * @return Size of the union serialized in compact form, in bytes.
474
+ */
475
+ int get_compact_serialization_bytes() const;
476
+
477
+ /**
478
+ * Returns the size of the union serialized without compaction.
479
+ * @return Size of the union serialized without compaction, in bytes.
480
+ */
481
+ int get_updatable_serialization_bytes() const;
482
+
483
+ /**
484
+ * Returns union's configured lg_k value.
485
+ * @return Configured lg_k value.
486
+ */
487
+ int get_lg_config_k() const;
488
+
489
+ /**
490
+ * Returns the union's target HLL mode (from #target_hll_type).
491
+ * @return The union's target HLL mode.
492
+ */
493
+ target_hll_type get_target_type() const;
494
+
495
+ /**
496
+ * Indicates if the union is currently stored compacted.
497
+ * @return True if the union is stored in compact form.
498
+ */
499
+ bool is_compact() const;
500
+
501
+ /**
502
+ * Indicates if the union is currently empty.
503
+ * @return True if the union is empty.
504
+ */
505
+ bool is_empty() const;
506
+
507
+ /**
508
+ * Resets the union to an empty state in coupon collection mode.
509
+ * Does not re-use existing internal objects.
510
+ */
511
+ void reset();
512
+
513
+ /**
514
+ * Returns the result of this union operator with the specified
515
+ * #tgt_hll_type.
516
+ * @param The tgt_hll_type enum value of the desired result (Default: HLL_4)
517
+ * @return The result of this union with the specified tgt_hll_type
518
+ */
519
+ hll_sketch_alloc<A> get_result(target_hll_type tgt_type = HLL_4) const;
520
+
521
+ /**
522
+ * Update this union operator with the given sketch.
523
+ * @param The given sketch.
524
+ */
525
+ void update(const hll_sketch_alloc<A>& sketch);
526
+
527
+ /**
528
+ * Update this union operator with the given temporary sketch.
529
+ * @param The given sketch.
530
+ */
531
+ void update(hll_sketch_alloc<A>&& sketch);
532
+
533
+ /**
534
+ * Present the given std::string as a potential unique item.
535
+ * The string is converted to a byte array using UTF8 encoding.
536
+ * If the string is null or empty no update attempt is made and the method returns.
537
+ * @param datum The given string.
538
+ */
539
+ void update(const std::string& datum);
540
+
541
+ /**
542
+ * Present the given unsigned 64-bit integer as a potential unique item.
543
+ * @param datum The given integer.
544
+ */
545
+ void update(uint64_t datum);
546
+
547
+ /**
548
+ * Present the given unsigned 32-bit integer as a potential unique item.
549
+ * @param datum The given integer.
550
+ */
551
+ void update(uint32_t datum);
552
+
553
+ /**
554
+ * Present the given unsigned 16-bit integer as a potential unique item.
555
+ * @param datum The given integer.
556
+ */
557
+ void update(uint16_t datum);
558
+
559
+ /**
560
+ * Present the given unsigned 8-bit integer as a potential unique item.
561
+ * @param datum The given integer.
562
+ */
563
+ void update(uint8_t datum);
564
+
565
+ /**
566
+ * Present the given signed 64-bit integer as a potential unique item.
567
+ * @param datum The given integer.
568
+ */
569
+ void update(int64_t datum);
570
+
571
+ /**
572
+ * Present the given signed 32-bit integer as a potential unique item.
573
+ * @param datum The given integer.
574
+ */
575
+ void update(int32_t datum);
576
+
577
+ /**
578
+ * Present the given signed 16-bit integer as a potential unique item.
579
+ * @param datum The given integer.
580
+ */
581
+ void update(int16_t datum);
582
+
583
+ /**
584
+ * Present the given signed 8-bit integer as a potential unique item.
585
+ * @param datum The given integer.
586
+ */
587
+ void update(int8_t datum);
588
+
589
+ /**
590
+ * Present the given 64-bit floating point value as a potential unique item.
591
+ * @param datum The given double.
592
+ */
593
+ void update(double datum);
594
+
595
+ /**
596
+ * Present the given 32-bit floating point value as a potential unique item.
597
+ * @param datum The given float.
598
+ */
599
+ void update(float datum);
600
+
601
+ /**
602
+ * Present the given data array as a potential unique item.
603
+ * @param data The given array.
604
+ * @param length_bytes The array length in bytes.
605
+ */
606
+ void update(const void* data, size_t length_bytes);
607
+
608
+ /**
609
+ * Returns the maximum size in bytes that this union operator can grow to given a lg_k.
610
+ *
611
+ * @param lg_k The maximum Log2 of k for this union operator. This value must be
612
+ * between 4 and 21 inclusively.
613
+ * @return the maximum size in bytes that this union operator can grow to.
614
+ */
615
+ static int get_max_serialization_bytes(int lg_k);
616
+
617
+ /**
618
+ * Gets the current (approximate) Relative Error (RE) asymptotic values given several
619
+ * parameters. This is used primarily for testing.
620
+ * @param upper_bound return the RE for the Upper Bound, otherwise for the Lower Bound.
621
+ * @param unioned set true if the sketch is the result of a union operation.
622
+ * @param lg_config_k the configured value for the sketch.
623
+ * @param num_std_dev the given number of Standard Deviations. This must be an integer between
624
+ * 1 and 3, inclusive.
625
+ * @return the current (approximate) RelativeError
626
+ */
627
+ static double get_rel_err(bool upper_bound, bool unioned,
628
+ int lg_config_k, int num_std_dev);
629
+
630
+ private:
631
+
632
+ /**
633
+ * Union the given source and destination sketches. This method examines the state of
634
+ * the current internal gadget and the incoming sketch and determines the optimal way to
635
+ * perform the union. This may involve swapping, down-sampling, transforming, and / or
636
+ * copying one of the arguments and may completely replace the internals of the union.
637
+ *
638
+ * @param incoming_impl the given incoming sketch, which may not be modified.
639
+ * @param lg_max_k the maximum value of log2 K for this union.
640
+ */
641
+ inline void union_impl(const hll_sketch_alloc<A>& sketch, int lg_max_k);
642
+
643
+ static HllSketchImpl<A>* copy_or_downsample(const HllSketchImpl<A>* src_impl, int tgt_lg_k);
644
+
645
+ void coupon_update(int coupon);
646
+
647
+ hll_mode get_current_mode() const;
648
+ int get_serialization_version() const;
649
+ bool is_out_of_order_flag() const;
650
+ bool is_estimation_mode() const;
651
+
652
+ // calls couponUpdate on sketch, freeing the old sketch upon changes in hll_mode
653
+ static HllSketchImpl<A>* leak_free_coupon_update(HllSketchImpl<A>* impl, int coupon);
654
+
655
+ int lg_max_k;
656
+ hll_sketch_alloc<A> gadget;
657
+ };
658
+
659
+ /// convenience alias for hll_sketch with default allocator
660
+ typedef hll_sketch_alloc<> hll_sketch;
661
+
662
+ /// convenience alias for hll_union with default allocator
663
+ typedef hll_union_alloc<> hll_union;
664
+
665
+ } // namespace datasketches
666
+
667
+ #include "hll.private.hpp"
668
+
669
+ #endif // _HLL_HPP_