datasketches 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,124 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _HLL6ARRAY_INTERNAL_HPP_
21
+ #define _HLL6ARRAY_INTERNAL_HPP_
22
+
23
+ #include <cstring>
24
+
25
+ #include "Hll6Array.hpp"
26
+
27
+ namespace datasketches {
28
+
29
+ template<typename A>
30
+ Hll6Array<A>::Hll6Array(const int lgConfigK, const bool startFullSize) :
31
+ HllArray<A>(lgConfigK, target_hll_type::HLL_6, startFullSize) {
32
+ const int numBytes = this->hll6ArrBytes(lgConfigK);
33
+ typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
34
+ this->hllByteArr = uint8Alloc().allocate(numBytes);
35
+ std::fill(this->hllByteArr, this->hllByteArr + numBytes, 0);
36
+ }
37
+
38
+ template<typename A>
39
+ Hll6Array<A>::Hll6Array(const Hll6Array<A>& that) :
40
+ HllArray<A>(that)
41
+ {
42
+ // can determine hllByteArr size in parent class, no need to allocate here
43
+ }
44
+
45
+ template<typename A>
46
+ Hll6Array<A>::~Hll6Array() {
47
+ // hllByteArr deleted in parent
48
+ }
49
+
50
+ template<typename A>
51
+ std::function<void(HllSketchImpl<A>*)> Hll6Array<A>::get_deleter() const {
52
+ return [](HllSketchImpl<A>* ptr) {
53
+ typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc;
54
+ Hll6Array<A>* hll = static_cast<Hll6Array<A>*>(ptr);
55
+ hll->~Hll6Array();
56
+ hll6Alloc().deallocate(hll, 1);
57
+ };
58
+ }
59
+
60
+ template<typename A>
61
+ Hll6Array<A>* Hll6Array<A>::copy() const {
62
+ typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc;
63
+ return new (hll6Alloc().allocate(1)) Hll6Array<A>(*this);
64
+ }
65
+
66
+ template<typename A>
67
+ uint8_t Hll6Array<A>::getSlot(int slotNo) const {
68
+ const int startBit = slotNo * 6;
69
+ const int shift = startBit & 0x7;
70
+ const int byteIdx = startBit >> 3;
71
+ const uint16_t twoByteVal = (this->hllByteArr[byteIdx + 1] << 8) | this->hllByteArr[byteIdx];
72
+ return (twoByteVal >> shift) & HllUtil<A>::VAL_MASK_6;
73
+ }
74
+
75
+ template<typename A>
76
+ void Hll6Array<A>::putSlot(int slotNo, uint8_t value) {
77
+ const int startBit = slotNo * 6;
78
+ const int shift = startBit & 0x7;
79
+ const int byteIdx = startBit >> 3;
80
+ const uint16_t valShifted = (value & 0x3F) << shift;
81
+ uint16_t curMasked = (this->hllByteArr[byteIdx + 1] << 8) | this->hllByteArr[byteIdx];
82
+ curMasked &= (~(HllUtil<A>::VAL_MASK_6 << shift));
83
+ const uint16_t insert = curMasked | valShifted;
84
+ this->hllByteArr[byteIdx] = insert & 0xFF;
85
+ this->hllByteArr[byteIdx + 1] = (insert & 0xFF00) >> 8;
86
+ }
87
+
88
+ template<typename A>
89
+ int Hll6Array<A>::getHllByteArrBytes() const {
90
+ return this->hll6ArrBytes(this->lgConfigK);
91
+ }
92
+
93
+ template<typename A>
94
+ HllSketchImpl<A>* Hll6Array<A>::couponUpdate(const int coupon) {
95
+ internalCouponUpdate(coupon);
96
+ return this;
97
+ }
98
+
99
+ template<typename A>
100
+ void Hll6Array<A>::internalCouponUpdate(const int coupon) {
101
+ const int configKmask = (1 << this->lgConfigK) - 1;
102
+ const int slotNo = HllUtil<A>::getLow26(coupon) & configKmask;
103
+ const int newVal = HllUtil<A>::getValue(coupon);
104
+
105
+ const int curVal = getSlot(slotNo);
106
+ if (newVal > curVal) {
107
+ putSlot(slotNo, newVal);
108
+ this->hipAndKxQIncrementalUpdate(curVal, newVal);
109
+ if (curVal == 0) {
110
+ this->numAtCurMin--; // interpret numAtCurMin as num zeros
111
+ }
112
+ }
113
+ }
114
+
115
+ template<typename A>
116
+ void Hll6Array<A>::mergeHll(const HllArray<A>& src) {
117
+ for (auto coupon: src) {
118
+ internalCouponUpdate(coupon);
119
+ }
120
+ }
121
+
122
+ }
123
+
124
+ #endif // _HLL6ARRAY_INTERNAL_HPP_
@@ -0,0 +1,55 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _HLL6ARRAY_HPP_
21
+ #define _HLL6ARRAY_HPP_
22
+
23
+ #include "HllArray.hpp"
24
+
25
+ namespace datasketches {
26
+
27
+ template<typename A>
28
+ class Hll6Iterator;
29
+
30
+ template<typename A>
31
+ class Hll6Array final : public HllArray<A> {
32
+ public:
33
+ explicit Hll6Array(int lgConfigK, bool startFullSize);
34
+ explicit Hll6Array(const Hll6Array<A>& that);
35
+
36
+ virtual ~Hll6Array();
37
+ virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
38
+
39
+ virtual Hll6Array* copy() const;
40
+
41
+ inline uint8_t getSlot(int slotNo) const;
42
+ inline void putSlot(int slotNo, uint8_t value);
43
+
44
+ virtual HllSketchImpl<A>* couponUpdate(int coupon) final;
45
+ void mergeHll(const HllArray<A>& src);
46
+
47
+ virtual int getHllByteArrBytes() const;
48
+
49
+ private:
50
+ void internalCouponUpdate(int coupon);
51
+ };
52
+
53
+ }
54
+
55
+ #endif /* _HLL6ARRAY_HPP_ */
@@ -0,0 +1,158 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _HLL8ARRAY_INTERNAL_HPP_
21
+ #define _HLL8ARRAY_INTERNAL_HPP_
22
+
23
+ #include "Hll8Array.hpp"
24
+
25
+ namespace datasketches {
26
+
27
+ template<typename A>
28
+ Hll8Array<A>::Hll8Array(const int lgConfigK, const bool startFullSize) :
29
+ HllArray<A>(lgConfigK, target_hll_type::HLL_8, startFullSize) {
30
+ const int numBytes = this->hll8ArrBytes(lgConfigK);
31
+ typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
32
+ this->hllByteArr = uint8Alloc().allocate(numBytes);
33
+ std::fill(this->hllByteArr, this->hllByteArr + numBytes, 0);
34
+ }
35
+
36
+ template<typename A>
37
+ Hll8Array<A>::Hll8Array(const Hll8Array<A>& that) :
38
+ HllArray<A>(that)
39
+ {
40
+ // can determine hllByteArr size in parent class, no need to allocate here
41
+ }
42
+
43
+ template<typename A>
44
+ Hll8Array<A>::~Hll8Array() {
45
+ // hllByteArr deleted in parent
46
+ }
47
+
48
+ template<typename A>
49
+ std::function<void(HllSketchImpl<A>*)> Hll8Array<A>::get_deleter() const {
50
+ return [](HllSketchImpl<A>* ptr) {
51
+ typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
52
+ Hll8Array<A>* hll = static_cast<Hll8Array<A>*>(ptr);
53
+ hll->~Hll8Array();
54
+ hll8Alloc().deallocate(hll, 1);
55
+ };
56
+ }
57
+
58
+ template<typename A>
59
+ Hll8Array<A>* Hll8Array<A>::copy() const {
60
+ typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
61
+ return new (hll8Alloc().allocate(1)) Hll8Array<A>(*this);
62
+ }
63
+
64
+ template<typename A>
65
+ uint8_t Hll8Array<A>::getSlot(const int slotNo) const {
66
+ return this->hllByteArr[slotNo];
67
+ }
68
+
69
+ template<typename A>
70
+ void Hll8Array<A>::putSlot(const int slotNo, uint8_t value) {
71
+ this->hllByteArr[slotNo] = value;
72
+ }
73
+
74
+ template<typename A>
75
+ int Hll8Array<A>::getHllByteArrBytes() const {
76
+ return this->hll8ArrBytes(this->lgConfigK);
77
+ }
78
+
79
+ template<typename A>
80
+ HllSketchImpl<A>* Hll8Array<A>::couponUpdate(int coupon) {
81
+ internalCouponUpdate(coupon);
82
+ return this;
83
+ }
84
+
85
+ template<typename A>
86
+ void Hll8Array<A>::internalCouponUpdate(int coupon) {
87
+ const int configKmask = (1 << this->lgConfigK) - 1;
88
+ const int slotNo = HllUtil<A>::getLow26(coupon) & configKmask;
89
+ const int newVal = HllUtil<A>::getValue(coupon);
90
+
91
+ const int curVal = getSlot(slotNo);
92
+ if (newVal > curVal) {
93
+ putSlot(slotNo, newVal);
94
+ this->hipAndKxQIncrementalUpdate(curVal, newVal);
95
+ if (curVal == 0) {
96
+ this->numAtCurMin--; // interpret numAtCurMin as num zeros
97
+ }
98
+ }
99
+ }
100
+
101
+ template<typename A>
102
+ void Hll8Array<A>::mergeList(const CouponList<A>& src) {
103
+ for (auto coupon: src) {
104
+ internalCouponUpdate(coupon);
105
+ }
106
+ }
107
+
108
+ template<typename A>
109
+ void Hll8Array<A>::mergeHll(const HllArray<A>& src) {
110
+ // at this point src_k >= dst_k
111
+ const int src_k = 1 << src.getLgConfigK();
112
+ const int dst_mask = (1 << this->getLgConfigK()) - 1;
113
+ // duplication below is to avoid a virtual method call in a loop
114
+ if (src.getTgtHllType() == target_hll_type::HLL_8) {
115
+ for (int i = 0; i < src_k; i++) {
116
+ const uint8_t new_v = static_cast<const Hll8Array<A>&>(src).getSlot(i);
117
+ const int j = i & dst_mask;
118
+ const uint8_t old_v = this->hllByteArr[j];
119
+ if (new_v > old_v) {
120
+ this->hllByteArr[j] = new_v;
121
+ this->hipAndKxQIncrementalUpdate(old_v, new_v);
122
+ if (old_v == 0) {
123
+ this->numAtCurMin--;
124
+ }
125
+ }
126
+ }
127
+ } else if (src.getTgtHllType() == target_hll_type::HLL_6) {
128
+ for (int i = 0; i < src_k; i++) {
129
+ const uint8_t new_v = static_cast<const Hll6Array<A>&>(src).getSlot(i);
130
+ const int j = i & dst_mask;
131
+ const uint8_t old_v = this->hllByteArr[j];
132
+ if (new_v > old_v) {
133
+ this->hllByteArr[j] = new_v;
134
+ this->hipAndKxQIncrementalUpdate(old_v, new_v);
135
+ if (old_v == 0) {
136
+ this->numAtCurMin--;
137
+ }
138
+ }
139
+ }
140
+ } else { // HLL_4
141
+ for (int i = 0; i < src_k; i++) {
142
+ const uint8_t new_v = static_cast<const Hll4Array<A>&>(src).get_value(i);
143
+ const int j = i & dst_mask;
144
+ const uint8_t old_v = this->hllByteArr[j];
145
+ if (new_v > old_v) {
146
+ this->hllByteArr[j] = new_v;
147
+ this->hipAndKxQIncrementalUpdate(old_v, new_v);
148
+ if (old_v == 0) {
149
+ this->numAtCurMin--;
150
+ }
151
+ }
152
+ }
153
+ }
154
+ }
155
+
156
+ }
157
+
158
+ #endif // _HLL8ARRAY_INTERNAL_HPP_
@@ -0,0 +1,56 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _HLL8ARRAY_HPP_
21
+ #define _HLL8ARRAY_HPP_
22
+
23
+ #include "HllArray.hpp"
24
+
25
+ namespace datasketches {
26
+
27
+ template<typename A>
28
+ class Hll8Iterator;
29
+
30
+ template<typename A>
31
+ class Hll8Array final : public HllArray<A> {
32
+ public:
33
+ explicit Hll8Array(int lgConfigK, bool startFullSize);
34
+ explicit Hll8Array(const Hll8Array& that);
35
+
36
+ virtual ~Hll8Array();
37
+ virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
38
+
39
+ virtual Hll8Array<A>* copy() const;
40
+
41
+ inline uint8_t getSlot(int slotNo) const;
42
+ inline void putSlot(int slotNo, uint8_t value);
43
+
44
+ virtual HllSketchImpl<A>* couponUpdate(int coupon) final;
45
+ void mergeList(const CouponList<A>& src);
46
+ void mergeHll(const HllArray<A>& src);
47
+
48
+ virtual int getHllByteArrBytes() const;
49
+
50
+ private:
51
+ inline void internalCouponUpdate(int coupon);
52
+ };
53
+
54
+ }
55
+
56
+ #endif /* _HLL8ARRAY_HPP_ */
@@ -0,0 +1,706 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _HLLARRAY_INTERNAL_HPP_
21
+ #define _HLLARRAY_INTERNAL_HPP_
22
+
23
+ #include "HllArray.hpp"
24
+ #include "HllUtil.hpp"
25
+ #include "HarmonicNumbers.hpp"
26
+ #include "CubicInterpolation.hpp"
27
+ #include "CompositeInterpolationXTable.hpp"
28
+ #include "CouponList.hpp"
29
+ #include "inv_pow2_table.hpp"
30
+ #include <cstring>
31
+ #include <cmath>
32
+ #include <stdexcept>
33
+ #include <string>
34
+
35
+ namespace datasketches {
36
+
37
+ template<typename A>
38
+ HllArray<A>::HllArray(const int lgConfigK, const target_hll_type tgtHllType, bool startFullSize)
39
+ : HllSketchImpl<A>(lgConfigK, tgtHllType, hll_mode::HLL, startFullSize) {
40
+ hipAccum = 0.0;
41
+ kxq0 = 1 << lgConfigK;
42
+ kxq1 = 0.0;
43
+ curMin = 0;
44
+ numAtCurMin = 1 << lgConfigK;
45
+ oooFlag = false;
46
+ hllByteArr = nullptr; // allocated in derived class
47
+ }
48
+
49
+ template<typename A>
50
+ HllArray<A>::HllArray(const HllArray<A>& that):
51
+ HllSketchImpl<A>(that.lgConfigK, that.tgtHllType, hll_mode::HLL, that.startFullSize),
52
+ hipAccum(that.hipAccum),
53
+ kxq0(that.kxq0),
54
+ kxq1(that.kxq1),
55
+ hllByteArr(nullptr),
56
+ curMin(that.curMin),
57
+ numAtCurMin(that.numAtCurMin),
58
+ oooFlag(that.oooFlag)
59
+ {
60
+ const int arrayLen = that.getHllByteArrBytes();
61
+ typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
62
+ hllByteArr = uint8Alloc().allocate(arrayLen);
63
+ std::copy(that.hllByteArr, that.hllByteArr + arrayLen, hllByteArr);
64
+ }
65
+
66
+ template<typename A>
67
+ HllArray<A>::~HllArray() {
68
+ // need to determine number of bytes to deallocate
69
+ int hllArrBytes = 0;
70
+ if (this->tgtHllType == target_hll_type::HLL_4) {
71
+ hllArrBytes = hll4ArrBytes(this->lgConfigK);
72
+ } else if (this->tgtHllType == target_hll_type::HLL_6) {
73
+ hllArrBytes = hll6ArrBytes(this->lgConfigK);
74
+ } else { // tgtHllType == HLL_8
75
+ hllArrBytes = hll8ArrBytes(this->lgConfigK);
76
+ }
77
+ typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
78
+ uint8Alloc().deallocate(hllByteArr, hllArrBytes);
79
+ }
80
+
81
+ template<typename A>
82
+ HllArray<A>* HllArray<A>::copyAs(const target_hll_type tgtHllType) const {
83
+ if (tgtHllType == this->getTgtHllType()) {
84
+ return static_cast<HllArray*>(copy());
85
+ }
86
+ if (tgtHllType == target_hll_type::HLL_4) {
87
+ return HllSketchImplFactory<A>::convertToHll4(*this);
88
+ } else if (tgtHllType == target_hll_type::HLL_6) {
89
+ return HllSketchImplFactory<A>::convertToHll6(*this);
90
+ } else { // tgtHllType == HLL_8
91
+ return HllSketchImplFactory<A>::convertToHll8(*this);
92
+ }
93
+ }
94
+
95
+ template<typename A>
96
+ HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len) {
97
+ if (len < HllUtil<A>::HLL_BYTE_ARR_START) {
98
+ throw std::out_of_range("Input data length insufficient to hold HLL array");
99
+ }
100
+
101
+ const uint8_t* data = static_cast<const uint8_t*>(bytes);
102
+ if (data[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::HLL_PREINTS) {
103
+ throw std::invalid_argument("Incorrect number of preInts in input stream");
104
+ }
105
+ if (data[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
106
+ throw std::invalid_argument("Wrong ser ver in input stream");
107
+ }
108
+ if (data[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
109
+ throw std::invalid_argument("Input array is not an HLL sketch");
110
+ }
111
+
112
+ const hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
113
+ if (mode != HLL) {
114
+ throw std::invalid_argument("Calling HLL array construtor with non-HLL mode data");
115
+ }
116
+
117
+ const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
118
+ const bool oooFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
119
+ const bool comapctFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
120
+ const bool startFullSizeFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::FULL_SIZE_FLAG_MASK) ? true : false);
121
+
122
+ const int lgK = (int) data[HllUtil<A>::LG_K_BYTE];
123
+ const int curMin = (int) data[HllUtil<A>::HLL_CUR_MIN_BYTE];
124
+
125
+ const int arrayBytes = hllArrBytes(tgtHllType, lgK);
126
+ if (len < static_cast<size_t>(HllUtil<A>::HLL_BYTE_ARR_START + arrayBytes)) {
127
+ throw std::out_of_range("Input array too small to hold sketch image");
128
+ }
129
+
130
+ double hip, kxq0, kxq1;
131
+ std::memcpy(&hip, data + HllUtil<A>::HIP_ACCUM_DOUBLE, sizeof(double));
132
+ std::memcpy(&kxq0, data + HllUtil<A>::KXQ0_DOUBLE, sizeof(double));
133
+ std::memcpy(&kxq1, data + HllUtil<A>::KXQ1_DOUBLE, sizeof(double));
134
+
135
+ int numAtCurMin, auxCount;
136
+ std::memcpy(&numAtCurMin, data + HllUtil<A>::CUR_MIN_COUNT_INT, sizeof(int));
137
+ std::memcpy(&auxCount, data + HllUtil<A>::AUX_COUNT_INT, sizeof(int));
138
+
139
+ AuxHashMap<A>* auxHashMap = nullptr;
140
+ typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr;
141
+ aux_hash_map_ptr aux_ptr;
142
+ if (auxCount > 0) { // necessarily TgtHllType == HLL_4
143
+ int auxLgIntArrSize = (int) data[4];
144
+ const size_t offset = HllUtil<A>::HLL_BYTE_ARR_START + arrayBytes;
145
+ const uint8_t* auxDataStart = data + offset;
146
+ auxHashMap = AuxHashMap<A>::deserialize(auxDataStart, len - offset, lgK, auxCount, auxLgIntArrSize, comapctFlag);
147
+ aux_ptr = aux_hash_map_ptr(auxHashMap, auxHashMap->make_deleter());
148
+ }
149
+
150
+ HllArray<A>* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag);
151
+ sketch->putCurMin(curMin);
152
+ sketch->putOutOfOrderFlag(oooFlag);
153
+ if (!oooFlag) sketch->putHipAccum(hip);
154
+ sketch->putKxQ0(kxq0);
155
+ sketch->putKxQ1(kxq1);
156
+ sketch->putNumAtCurMin(numAtCurMin);
157
+
158
+ std::memcpy(sketch->hllByteArr, data + HllUtil<A>::HLL_BYTE_ARR_START, arrayBytes);
159
+
160
+ if (auxHashMap != nullptr)
161
+ ((Hll4Array<A>*)sketch)->putAuxHashMap(auxHashMap);
162
+
163
+ aux_ptr.release();
164
+ return sketch;
165
+ }
166
+
167
+ template<typename A>
168
+ HllArray<A>* HllArray<A>::newHll(std::istream& is) {
169
+ uint8_t listHeader[8];
170
+ is.read((char*)listHeader, 8 * sizeof(uint8_t));
171
+
172
+ if (listHeader[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::HLL_PREINTS) {
173
+ throw std::invalid_argument("Incorrect number of preInts in input stream");
174
+ }
175
+ if (listHeader[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
176
+ throw std::invalid_argument("Wrong ser ver in input stream");
177
+ }
178
+ if (listHeader[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
179
+ throw std::invalid_argument("Input stream is not an HLL sketch");
180
+ }
181
+
182
+ hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
183
+ if (mode != HLL) {
184
+ throw std::invalid_argument("Calling HLL construtor with non-HLL mode data");
185
+ }
186
+
187
+ const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
188
+ const bool oooFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
189
+ const bool comapctFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
190
+ const bool startFullSizeFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::FULL_SIZE_FLAG_MASK) ? true : false);
191
+
192
+ const int lgK = (int) listHeader[HllUtil<A>::LG_K_BYTE];
193
+ const int curMin = (int) listHeader[HllUtil<A>::HLL_CUR_MIN_BYTE];
194
+
195
+ HllArray* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag);
196
+ typedef std::unique_ptr<HllArray<A>, std::function<void(HllSketchImpl<A>*)>> hll_array_ptr;
197
+ hll_array_ptr sketch_ptr(sketch, sketch->get_deleter());
198
+ sketch->putCurMin(curMin);
199
+ sketch->putOutOfOrderFlag(oooFlag);
200
+
201
+ double hip, kxq0, kxq1;
202
+ is.read((char*)&hip, sizeof(hip));
203
+ is.read((char*)&kxq0, sizeof(kxq0));
204
+ is.read((char*)&kxq1, sizeof(kxq1));
205
+ if (!oooFlag) sketch->putHipAccum(hip);
206
+ sketch->putKxQ0(kxq0);
207
+ sketch->putKxQ1(kxq1);
208
+
209
+ int numAtCurMin, auxCount;
210
+ is.read((char*)&numAtCurMin, sizeof(numAtCurMin));
211
+ is.read((char*)&auxCount, sizeof(auxCount));
212
+ sketch->putNumAtCurMin(numAtCurMin);
213
+
214
+ is.read((char*)sketch->hllByteArr, sketch->getHllByteArrBytes());
215
+
216
+ if (auxCount > 0) { // necessarily TgtHllType == HLL_4
217
+ int auxLgIntArrSize = listHeader[4];
218
+ AuxHashMap<A>* auxHashMap = AuxHashMap<A>::deserialize(is, lgK, auxCount, auxLgIntArrSize, comapctFlag);
219
+ ((Hll4Array<A>*)sketch)->putAuxHashMap(auxHashMap);
220
+ }
221
+
222
+ if (!is.good())
223
+ throw std::runtime_error("error reading from std::istream");
224
+
225
+ return sketch_ptr.release();
226
+ }
227
+
228
+ template<typename A>
229
+ vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) const {
230
+ const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes;
231
+ vector_u8<A> byteArr(sketchSizeBytes);
232
+ uint8_t* bytes = byteArr.data() + header_size_bytes;
233
+ AuxHashMap<A>* auxHashMap = getAuxHashMap();
234
+
235
+ bytes[HllUtil<A>::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
236
+ bytes[HllUtil<A>::SER_VER_BYTE] = static_cast<uint8_t>(HllUtil<A>::SER_VER);
237
+ bytes[HllUtil<A>::FAMILY_BYTE] = static_cast<uint8_t>(HllUtil<A>::FAMILY_ID);
238
+ bytes[HllUtil<A>::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK);
239
+ bytes[HllUtil<A>::LG_ARR_BYTE] = static_cast<uint8_t>(auxHashMap == nullptr ? 0 : auxHashMap->getLgAuxArrInts());
240
+ bytes[HllUtil<A>::FLAGS_BYTE] = this->makeFlagsByte(compact);
241
+ bytes[HllUtil<A>::HLL_CUR_MIN_BYTE] = static_cast<uint8_t>(curMin);
242
+ bytes[HllUtil<A>::MODE_BYTE] = this->makeModeByte();
243
+
244
+ std::memcpy(bytes + HllUtil<A>::HIP_ACCUM_DOUBLE, &hipAccum, sizeof(double));
245
+ std::memcpy(bytes + HllUtil<A>::KXQ0_DOUBLE, &kxq0, sizeof(double));
246
+ std::memcpy(bytes + HllUtil<A>::KXQ1_DOUBLE, &kxq1, sizeof(double));
247
+ std::memcpy(bytes + HllUtil<A>::CUR_MIN_COUNT_INT, &numAtCurMin, sizeof(int));
248
+ const int auxCount = (auxHashMap == nullptr ? 0 : auxHashMap->getAuxCount());
249
+ std::memcpy(bytes + HllUtil<A>::AUX_COUNT_INT, &auxCount, sizeof(int));
250
+
251
+ const int hllByteArrBytes = getHllByteArrBytes();
252
+ std::memcpy(bytes + getMemDataStart(), hllByteArr, hllByteArrBytes);
253
+
254
+ // aux map if HLL_4
255
+ if (this->tgtHllType == HLL_4) {
256
+ bytes += getMemDataStart() + hllByteArrBytes; // start of auxHashMap
257
+ if (auxHashMap != nullptr) {
258
+ if (compact) {
259
+ for (uint32_t coupon: *auxHashMap) {
260
+ std::memcpy(bytes, &coupon, sizeof(coupon));
261
+ bytes += sizeof(coupon);
262
+ }
263
+ } else {
264
+ std::memcpy(bytes, auxHashMap->getAuxIntArr(), auxHashMap->getUpdatableSizeBytes());
265
+ }
266
+ } else if (!compact) {
267
+ // if updatable, we write even if currently unused so the binary can be wrapped
268
+ int auxBytes = 4 << HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK];
269
+ std::fill_n(bytes, auxBytes, 0);
270
+ }
271
+ }
272
+
273
+ return byteArr;
274
+ }
275
+
276
+ template<typename A>
277
+ void HllArray<A>::serialize(std::ostream& os, const bool compact) const {
278
+ // header
279
+ const uint8_t preInts(getPreInts());
280
+ os.write((char*)&preInts, sizeof(preInts));
281
+ const uint8_t serialVersion(HllUtil<A>::SER_VER);
282
+ os.write((char*)&serialVersion, sizeof(serialVersion));
283
+ const uint8_t familyId(HllUtil<A>::FAMILY_ID);
284
+ os.write((char*)&familyId, sizeof(familyId));
285
+ const uint8_t lgKByte((uint8_t) this->lgConfigK);
286
+ os.write((char*)&lgKByte, sizeof(lgKByte));
287
+
288
+ AuxHashMap<A>* auxHashMap = getAuxHashMap();
289
+ uint8_t lgArrByte(0);
290
+ if (auxHashMap != nullptr) {
291
+ lgArrByte = auxHashMap->getLgAuxArrInts();
292
+ }
293
+ os.write((char*)&lgArrByte, sizeof(lgArrByte));
294
+
295
+ const uint8_t flagsByte(this->makeFlagsByte(compact));
296
+ os.write((char*)&flagsByte, sizeof(flagsByte));
297
+ const uint8_t curMinByte((uint8_t) curMin);
298
+ os.write((char*)&curMinByte, sizeof(curMinByte));
299
+ const uint8_t modeByte(this->makeModeByte());
300
+ os.write((char*)&modeByte, sizeof(modeByte));
301
+
302
+ // estimator data
303
+ os.write((char*)&hipAccum, sizeof(hipAccum));
304
+ os.write((char*)&kxq0, sizeof(kxq0));
305
+ os.write((char*)&kxq1, sizeof(kxq1));
306
+
307
+ // array data
308
+ os.write((char*)&numAtCurMin, sizeof(numAtCurMin));
309
+
310
+ const int auxCount = (auxHashMap == nullptr ? 0 : auxHashMap->getAuxCount());
311
+ os.write((char*)&auxCount, sizeof(auxCount));
312
+ os.write((char*)hllByteArr, getHllByteArrBytes());
313
+
314
+ // aux map if HLL_4
315
+ if (this->tgtHllType == HLL_4) {
316
+ if (auxHashMap != nullptr) {
317
+ if (compact) {
318
+ for (uint32_t coupon: *auxHashMap) {
319
+ os.write((char*)&coupon, sizeof(coupon));
320
+ }
321
+ } else {
322
+ os.write((char*)auxHashMap->getAuxIntArr(), auxHashMap->getUpdatableSizeBytes());
323
+ }
324
+ } else if (!compact) {
325
+ // if updatable, we write even if currently unused so the binary can be wrapped
326
+ int auxBytes = 4 << HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK];
327
+ std::fill_n(std::ostreambuf_iterator<char>(os), auxBytes, 0);
328
+ }
329
+ }
330
+ }
331
+
332
+ template<typename A>
333
+ double HllArray<A>::getEstimate() const {
334
+ if (oooFlag) {
335
+ return getCompositeEstimate();
336
+ }
337
+ return getHipAccum();
338
+ }
339
+
340
+ // HLL UPPER AND LOWER BOUNDS
341
+
342
+ /*
343
+ * The upper and lower bounds are not symmetric and thus are treated slightly differently.
344
+ * For the lower bound, when the unique count is <= k, LB >= numNonZeros, where
345
+ * numNonZeros = k - numAtCurMin AND curMin == 0.
346
+ *
347
+ * For HLL6 and HLL8, curMin is always 0 and numAtCurMin is initialized to k and is decremented
348
+ * down for each valid update until it reaches 0, where it stays. Thus, for these two
349
+ * isomorphs, when numAtCurMin = 0, means the true curMin is > 0 and the unique count must be
350
+ * greater than k.
351
+ *
352
+ * HLL4 always maintains both curMin and numAtCurMin dynamically. Nonetheless, the rules for
353
+ * the very small values <= k where curMin = 0 still apply.
354
+ */
355
+ template<typename A>
356
+ double HllArray<A>::getLowerBound(const int numStdDev) const {
357
+ HllUtil<A>::checkNumStdDev(numStdDev);
358
+ const int configK = 1 << this->lgConfigK;
359
+ const double numNonZeros = ((curMin == 0) ? (configK - numAtCurMin) : configK);
360
+
361
+ double estimate;
362
+ double rseFactor;
363
+ if (oooFlag) {
364
+ estimate = getCompositeEstimate();
365
+ rseFactor = HllUtil<A>::HLL_NON_HIP_RSE_FACTOR;
366
+ } else {
367
+ estimate = hipAccum;
368
+ rseFactor = HllUtil<A>::HLL_HIP_RSE_FACTOR;
369
+ }
370
+
371
+ double relErr;
372
+ if (this->lgConfigK > 12) {
373
+ relErr = (numStdDev * rseFactor) / sqrt(configK);
374
+ } else {
375
+ relErr = HllUtil<A>::getRelErr(false, oooFlag, this->lgConfigK, numStdDev);
376
+ }
377
+ return fmax(estimate / (1.0 + relErr), numNonZeros);
378
+ }
379
+
380
+ template<typename A>
381
+ double HllArray<A>::getUpperBound(const int numStdDev) const {
382
+ HllUtil<A>::checkNumStdDev(numStdDev);
383
+ const int configK = 1 << this->lgConfigK;
384
+
385
+ double estimate;
386
+ double rseFactor;
387
+ if (oooFlag) {
388
+ estimate = getCompositeEstimate();
389
+ rseFactor = HllUtil<A>::HLL_NON_HIP_RSE_FACTOR;
390
+ } else {
391
+ estimate = hipAccum;
392
+ rseFactor = HllUtil<A>::HLL_HIP_RSE_FACTOR;
393
+ }
394
+
395
+ double relErr;
396
+ if (this->lgConfigK > 12) {
397
+ relErr = (-1.0) * (numStdDev * rseFactor) / sqrt(configK);
398
+ } else {
399
+ relErr = HllUtil<A>::getRelErr(true, oooFlag, this->lgConfigK, numStdDev);
400
+ }
401
+ return estimate / (1.0 + relErr);
402
+ }
403
+
404
+ /**
405
+ * This is the (non-HIP) estimator.
406
+ * It is called "composite" because multiple estimators are pasted together.
407
+ * @param absHllArr an instance of the AbstractHllArray class.
408
+ * @return the composite estimate
409
+ */
410
+ // Original C: again-two-registers.c hhb_get_composite_estimate L1489
411
+ template<typename A>
412
+ double HllArray<A>::getCompositeEstimate() const {
413
+ const double rawEst = getHllRawEstimate(this->lgConfigK, kxq0 + kxq1);
414
+
415
+ const double* xArr = CompositeInterpolationXTable<A>::get_x_arr(this->lgConfigK);
416
+ const int xArrLen = CompositeInterpolationXTable<A>::get_x_arr_length();
417
+ const double yStride = CompositeInterpolationXTable<A>::get_y_stride(this->lgConfigK);
418
+
419
+ if (rawEst < xArr[0]) {
420
+ return 0;
421
+ }
422
+
423
+ const int xArrLenM1 = xArrLen - 1;
424
+
425
+ if (rawEst > xArr[xArrLenM1]) {
426
+ double finalY = yStride * xArrLenM1;
427
+ double factor = finalY / xArr[xArrLenM1];
428
+ return rawEst * factor;
429
+ }
430
+
431
+ double adjEst = CubicInterpolation<A>::usingXArrAndYStride(xArr, xArrLen, yStride, rawEst);
432
+
433
+ // We need to completely avoid the linear_counting estimator if it might have a crazy value.
434
+ // Empirical evidence suggests that the threshold 3*k will keep us safe if 2^4 <= k <= 2^21.
435
+
436
+ if (adjEst > (3 << this->lgConfigK)) { return adjEst; }
437
+
438
+ const double linEst =
439
+ getHllBitMapEstimate(this->lgConfigK, curMin, numAtCurMin);
440
+
441
+ // Bias is created when the value of an estimator is compared with a threshold to decide whether
442
+ // to use that estimator or a different one.
443
+ // We conjecture that less bias is created when the average of the two estimators
444
+ // is compared with the threshold. Empirical measurements support this conjecture.
445
+
446
+ const double avgEst = (adjEst + linEst) / 2.0;
447
+
448
+ // The following constants comes from empirical measurements of the crossover point
449
+ // between the average error of the linear estimator and the adjusted hll estimator
450
+ double crossOver = 0.64;
451
+ if (this->lgConfigK == 4) { crossOver = 0.718; }
452
+ else if (this->lgConfigK == 5) { crossOver = 0.672; }
453
+
454
+ return (avgEst > (crossOver * (1 << this->lgConfigK))) ? adjEst : linEst;
455
+ }
456
+
457
+ template<typename A>
458
+ double HllArray<A>::getKxQ0() const {
459
+ return kxq0;
460
+ }
461
+
462
+ template<typename A>
463
+ double HllArray<A>::getKxQ1() const {
464
+ return kxq1;
465
+ }
466
+
467
+ template<typename A>
468
+ double HllArray<A>::getHipAccum() const {
469
+ return hipAccum;
470
+ }
471
+
472
+ template<typename A>
473
+ int HllArray<A>::getCurMin() const {
474
+ return curMin;
475
+ }
476
+
477
+ template<typename A>
478
+ int HllArray<A>::getNumAtCurMin() const {
479
+ return numAtCurMin;
480
+ }
481
+
482
+ template<typename A>
483
+ void HllArray<A>::putKxQ0(const double kxq0) {
484
+ this->kxq0 = kxq0;
485
+ }
486
+
487
+ template<typename A>
488
+ void HllArray<A>::putKxQ1(const double kxq1) {
489
+ this->kxq1 = kxq1;
490
+ }
491
+
492
+ template<typename A>
493
+ void HllArray<A>::putHipAccum(const double hipAccum) {
494
+ this->hipAccum = hipAccum;
495
+ }
496
+
497
+ template<typename A>
498
+ void HllArray<A>::putCurMin(const int curMin) {
499
+ this->curMin = curMin;
500
+ }
501
+
502
+ template<typename A>
503
+ void HllArray<A>::putNumAtCurMin(const int numAtCurMin) {
504
+ this->numAtCurMin = numAtCurMin;
505
+ }
506
+
507
+ template<typename A>
508
+ void HllArray<A>::decNumAtCurMin() {
509
+ --numAtCurMin;
510
+ }
511
+
512
+ template<typename A>
513
+ void HllArray<A>::addToHipAccum(const double delta) {
514
+ hipAccum += delta;
515
+ }
516
+
517
+ template<typename A>
518
+ bool HllArray<A>::isCompact() const {
519
+ return false;
520
+ }
521
+
522
+ template<typename A>
523
+ bool HllArray<A>::isEmpty() const {
524
+ const int configK = 1 << this->lgConfigK;
525
+ return (getCurMin() == 0) && (getNumAtCurMin() == configK);
526
+ }
527
+
528
+ template<typename A>
529
+ void HllArray<A>::putOutOfOrderFlag(bool flag) {
530
+ oooFlag = flag;
531
+ }
532
+
533
+ template<typename A>
534
+ bool HllArray<A>::isOutOfOrderFlag() const {
535
+ return oooFlag;
536
+ }
537
+
538
+ template<typename A>
539
+ int HllArray<A>::hllArrBytes(target_hll_type tgtHllType, int lgConfigK) {
540
+ switch (tgtHllType) {
541
+ case HLL_4:
542
+ return hll4ArrBytes(lgConfigK);
543
+ case HLL_6:
544
+ return hll6ArrBytes(lgConfigK);
545
+ case HLL_8:
546
+ return hll8ArrBytes(lgConfigK);
547
+ default:
548
+ throw std::invalid_argument("Invalid target HLL type");
549
+ }
550
+ }
551
+
552
+ template<typename A>
553
+ int HllArray<A>::hll4ArrBytes(const int lgConfigK) {
554
+ return 1 << (lgConfigK - 1);
555
+ }
556
+
557
+ template<typename A>
558
+ int HllArray<A>::hll6ArrBytes(const int lgConfigK) {
559
+ const int numSlots = 1 << lgConfigK;
560
+ return ((numSlots * 3) >> 2) + 1;
561
+ }
562
+
563
+ template<typename A>
564
+ int HllArray<A>::hll8ArrBytes(const int lgConfigK) {
565
+ return 1 << lgConfigK;
566
+ }
567
+
568
+ template<typename A>
569
+ int HllArray<A>::getMemDataStart() const {
570
+ return HllUtil<A>::HLL_BYTE_ARR_START;
571
+ }
572
+
573
+ template<typename A>
574
+ int HllArray<A>::getUpdatableSerializationBytes() const {
575
+ return HllUtil<A>::HLL_BYTE_ARR_START + getHllByteArrBytes();
576
+ }
577
+
578
+ template<typename A>
579
+ int HllArray<A>::getCompactSerializationBytes() const {
580
+ AuxHashMap<A>* auxHashMap = getAuxHashMap();
581
+ const int auxCountBytes = ((auxHashMap == nullptr) ? 0 : auxHashMap->getCompactSizeBytes());
582
+ return HllUtil<A>::HLL_BYTE_ARR_START + getHllByteArrBytes() + auxCountBytes;
583
+ }
584
+
585
+ template<typename A>
586
+ int HllArray<A>::getPreInts() const {
587
+ return HllUtil<A>::HLL_PREINTS;
588
+ }
589
+
590
+ template<typename A>
591
+ AuxHashMap<A>* HllArray<A>::getAuxHashMap() const {
592
+ return nullptr;
593
+ }
594
+
595
+ template<typename A>
596
+ void HllArray<A>::hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue) {
597
+ const int configK = 1 << this->getLgConfigK();
598
+ // update hip BEFORE updating kxq
599
+ if (!oooFlag) hipAccum += configK / (kxq0 + kxq1);
600
+ // update kxq0 and kxq1; subtract first, then add
601
+ if (oldValue < 32) { kxq0 -= INVERSE_POWERS_OF_2[oldValue]; }
602
+ else { kxq1 -= INVERSE_POWERS_OF_2[oldValue]; }
603
+ if (newValue < 32) { kxq0 += INVERSE_POWERS_OF_2[newValue]; }
604
+ else { kxq1 += INVERSE_POWERS_OF_2[newValue]; }
605
+ }
606
+
607
+ /**
608
+ * Estimator when N is small, roughly less than k log(k).
609
+ * Refer to Wikipedia: Coupon Collector Problem
610
+ * @return the very low range estimate
611
+ */
612
+ //In C: again-two-registers.c hhb_get_improved_linear_counting_estimate L1274
613
+ template<typename A>
614
+ double HllArray<A>::getHllBitMapEstimate(const int lgConfigK, const int curMin, const int numAtCurMin) const {
615
+ const int configK = 1 << lgConfigK;
616
+ const int numUnhitBuckets = ((curMin == 0) ? numAtCurMin : 0);
617
+
618
+ //This will eventually go away.
619
+ if (numUnhitBuckets == 0) {
620
+ return configK * log(configK / 0.5);
621
+ }
622
+
623
+ const int numHitBuckets = configK - numUnhitBuckets;
624
+ return HarmonicNumbers<A>::getBitMapEstimate(configK, numHitBuckets);
625
+ }
626
+
627
+ //In C: again-two-registers.c hhb_get_raw_estimate L1167
628
+ template<typename A>
629
+ double HllArray<A>::getHllRawEstimate(const int lgConfigK, const double kxqSum) const {
630
+ const int configK = 1 << lgConfigK;
631
+ double correctionFactor;
632
+ if (lgConfigK == 4) { correctionFactor = 0.673; }
633
+ else if (lgConfigK == 5) { correctionFactor = 0.697; }
634
+ else if (lgConfigK == 6) { correctionFactor = 0.709; }
635
+ else { correctionFactor = 0.7213 / (1.0 + (1.079 / configK)); }
636
+ const double hyperEst = (correctionFactor * configK * configK) / kxqSum;
637
+ return hyperEst;
638
+ }
639
+
640
+ template<typename A>
641
+ typename HllArray<A>::const_iterator HllArray<A>::begin(bool all) const {
642
+ return const_iterator(hllByteArr, 1 << this->lgConfigK, 0, this->tgtHllType, nullptr, 0, all);
643
+ }
644
+
645
+ template<typename A>
646
+ typename HllArray<A>::const_iterator HllArray<A>::end() const {
647
+ return const_iterator(hllByteArr, 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, nullptr, 0, false);
648
+ }
649
+
650
+ template<typename A>
651
+ HllArray<A>::const_iterator::const_iterator(const uint8_t* array, size_t array_size, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset, bool all):
652
+ array(array), array_size(array_size), index(index), hll_type(hll_type), exceptions(exceptions), offset(offset), all(all)
653
+ {
654
+ while (this->index < array_size) {
655
+ value = get_value(array, this->index, hll_type, exceptions, offset);
656
+ if (all || value != HllUtil<A>::EMPTY) break;
657
+ this->index++;
658
+ }
659
+ }
660
+
661
+ template<typename A>
662
+ typename HllArray<A>::const_iterator& HllArray<A>::const_iterator::operator++() {
663
+ while (++index < array_size) {
664
+ value = get_value(array, index, hll_type, exceptions, offset);
665
+ if (all || value != HllUtil<A>::EMPTY) break;
666
+ }
667
+ return *this;
668
+ }
669
+
670
+ template<typename A>
671
+ bool HllArray<A>::const_iterator::operator!=(const const_iterator& other) const {
672
+ return index != other.index;
673
+ }
674
+
675
+ template<typename A>
676
+ uint32_t HllArray<A>::const_iterator::operator*() const {
677
+ return HllUtil<A>::pair(index, value);
678
+ }
679
+
680
+ template<typename A>
681
+ uint8_t HllArray<A>::const_iterator::get_value(const uint8_t* array, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset) {
682
+ if (hll_type == target_hll_type::HLL_4) {
683
+ uint8_t value = array[index >> 1];
684
+ if ((index & 1) > 0) { // odd
685
+ value >>= 4;
686
+ } else {
687
+ value &= HllUtil<A>::loNibbleMask;
688
+ }
689
+ if (value == HllUtil<A>::AUX_TOKEN) { // exception
690
+ return exceptions->mustFindValueFor(index);
691
+ }
692
+ return value + offset;
693
+ } else if (hll_type == target_hll_type::HLL_6) {
694
+ const int start_bit = index * 6;
695
+ const int shift = start_bit & 0x7;
696
+ const int byte_idx = start_bit >> 3;
697
+ const uint16_t two_byte_val = (array[byte_idx + 1] << 8) | array[byte_idx];
698
+ return (two_byte_val >> shift) & HllUtil<A>::VAL_MASK_6;
699
+ }
700
+ // HLL_8
701
+ return array[index];
702
+ }
703
+
704
+ }
705
+
706
+ #endif // _HLLARRAY_INTERNAL_HPP_