datasketches 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,124 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _HLL6ARRAY_INTERNAL_HPP_
21
+ #define _HLL6ARRAY_INTERNAL_HPP_
22
+
23
+ #include <cstring>
24
+
25
+ #include "Hll6Array.hpp"
26
+
27
+ namespace datasketches {
28
+
29
+ template<typename A>
30
+ Hll6Array<A>::Hll6Array(const int lgConfigK, const bool startFullSize) :
31
+ HllArray<A>(lgConfigK, target_hll_type::HLL_6, startFullSize) {
32
+ const int numBytes = this->hll6ArrBytes(lgConfigK);
33
+ typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
34
+ this->hllByteArr = uint8Alloc().allocate(numBytes);
35
+ std::fill(this->hllByteArr, this->hllByteArr + numBytes, 0);
36
+ }
37
+
38
+ template<typename A>
39
+ Hll6Array<A>::Hll6Array(const Hll6Array<A>& that) :
40
+ HllArray<A>(that)
41
+ {
42
+ // can determine hllByteArr size in parent class, no need to allocate here
43
+ }
44
+
45
+ template<typename A>
46
+ Hll6Array<A>::~Hll6Array() {
47
+ // hllByteArr deleted in parent
48
+ }
49
+
50
+ template<typename A>
51
+ std::function<void(HllSketchImpl<A>*)> Hll6Array<A>::get_deleter() const {
52
+ return [](HllSketchImpl<A>* ptr) {
53
+ typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc;
54
+ Hll6Array<A>* hll = static_cast<Hll6Array<A>*>(ptr);
55
+ hll->~Hll6Array();
56
+ hll6Alloc().deallocate(hll, 1);
57
+ };
58
+ }
59
+
60
+ template<typename A>
61
+ Hll6Array<A>* Hll6Array<A>::copy() const {
62
+ typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc;
63
+ return new (hll6Alloc().allocate(1)) Hll6Array<A>(*this);
64
+ }
65
+
66
+ template<typename A>
67
+ uint8_t Hll6Array<A>::getSlot(int slotNo) const {
68
+ const int startBit = slotNo * 6;
69
+ const int shift = startBit & 0x7;
70
+ const int byteIdx = startBit >> 3;
71
+ const uint16_t twoByteVal = (this->hllByteArr[byteIdx + 1] << 8) | this->hllByteArr[byteIdx];
72
+ return (twoByteVal >> shift) & HllUtil<A>::VAL_MASK_6;
73
+ }
74
+
75
+ template<typename A>
76
+ void Hll6Array<A>::putSlot(int slotNo, uint8_t value) {
77
+ const int startBit = slotNo * 6;
78
+ const int shift = startBit & 0x7;
79
+ const int byteIdx = startBit >> 3;
80
+ const uint16_t valShifted = (value & 0x3F) << shift;
81
+ uint16_t curMasked = (this->hllByteArr[byteIdx + 1] << 8) | this->hllByteArr[byteIdx];
82
+ curMasked &= (~(HllUtil<A>::VAL_MASK_6 << shift));
83
+ const uint16_t insert = curMasked | valShifted;
84
+ this->hllByteArr[byteIdx] = insert & 0xFF;
85
+ this->hllByteArr[byteIdx + 1] = (insert & 0xFF00) >> 8;
86
+ }
87
+
88
+ template<typename A>
89
+ int Hll6Array<A>::getHllByteArrBytes() const {
90
+ return this->hll6ArrBytes(this->lgConfigK);
91
+ }
92
+
93
+ template<typename A>
94
+ HllSketchImpl<A>* Hll6Array<A>::couponUpdate(const int coupon) {
95
+ internalCouponUpdate(coupon);
96
+ return this;
97
+ }
98
+
99
+ template<typename A>
100
+ void Hll6Array<A>::internalCouponUpdate(const int coupon) {
101
+ const int configKmask = (1 << this->lgConfigK) - 1;
102
+ const int slotNo = HllUtil<A>::getLow26(coupon) & configKmask;
103
+ const int newVal = HllUtil<A>::getValue(coupon);
104
+
105
+ const int curVal = getSlot(slotNo);
106
+ if (newVal > curVal) {
107
+ putSlot(slotNo, newVal);
108
+ this->hipAndKxQIncrementalUpdate(curVal, newVal);
109
+ if (curVal == 0) {
110
+ this->numAtCurMin--; // interpret numAtCurMin as num zeros
111
+ }
112
+ }
113
+ }
114
+
115
+ template<typename A>
116
+ void Hll6Array<A>::mergeHll(const HllArray<A>& src) {
117
+ for (auto coupon: src) {
118
+ internalCouponUpdate(coupon);
119
+ }
120
+ }
121
+
122
+ }
123
+
124
+ #endif // _HLL6ARRAY_INTERNAL_HPP_
@@ -0,0 +1,55 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _HLL6ARRAY_HPP_
21
+ #define _HLL6ARRAY_HPP_
22
+
23
+ #include "HllArray.hpp"
24
+
25
+ namespace datasketches {
26
+
27
+ template<typename A>
28
+ class Hll6Iterator;
29
+
30
+ template<typename A>
31
+ class Hll6Array final : public HllArray<A> {
32
+ public:
33
+ explicit Hll6Array(int lgConfigK, bool startFullSize);
34
+ explicit Hll6Array(const Hll6Array<A>& that);
35
+
36
+ virtual ~Hll6Array();
37
+ virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
38
+
39
+ virtual Hll6Array* copy() const;
40
+
41
+ inline uint8_t getSlot(int slotNo) const;
42
+ inline void putSlot(int slotNo, uint8_t value);
43
+
44
+ virtual HllSketchImpl<A>* couponUpdate(int coupon) final;
45
+ void mergeHll(const HllArray<A>& src);
46
+
47
+ virtual int getHllByteArrBytes() const;
48
+
49
+ private:
50
+ void internalCouponUpdate(int coupon);
51
+ };
52
+
53
+ }
54
+
55
+ #endif /* _HLL6ARRAY_HPP_ */
@@ -0,0 +1,158 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _HLL8ARRAY_INTERNAL_HPP_
21
+ #define _HLL8ARRAY_INTERNAL_HPP_
22
+
23
+ #include "Hll8Array.hpp"
24
+
25
+ namespace datasketches {
26
+
27
+ template<typename A>
28
+ Hll8Array<A>::Hll8Array(const int lgConfigK, const bool startFullSize) :
29
+ HllArray<A>(lgConfigK, target_hll_type::HLL_8, startFullSize) {
30
+ const int numBytes = this->hll8ArrBytes(lgConfigK);
31
+ typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
32
+ this->hllByteArr = uint8Alloc().allocate(numBytes);
33
+ std::fill(this->hllByteArr, this->hllByteArr + numBytes, 0);
34
+ }
35
+
36
+ template<typename A>
37
+ Hll8Array<A>::Hll8Array(const Hll8Array<A>& that) :
38
+ HllArray<A>(that)
39
+ {
40
+ // can determine hllByteArr size in parent class, no need to allocate here
41
+ }
42
+
43
+ template<typename A>
44
+ Hll8Array<A>::~Hll8Array() {
45
+ // hllByteArr deleted in parent
46
+ }
47
+
48
+ template<typename A>
49
+ std::function<void(HllSketchImpl<A>*)> Hll8Array<A>::get_deleter() const {
50
+ return [](HllSketchImpl<A>* ptr) {
51
+ typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
52
+ Hll8Array<A>* hll = static_cast<Hll8Array<A>*>(ptr);
53
+ hll->~Hll8Array();
54
+ hll8Alloc().deallocate(hll, 1);
55
+ };
56
+ }
57
+
58
+ template<typename A>
59
+ Hll8Array<A>* Hll8Array<A>::copy() const {
60
+ typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
61
+ return new (hll8Alloc().allocate(1)) Hll8Array<A>(*this);
62
+ }
63
+
64
+ template<typename A>
65
+ uint8_t Hll8Array<A>::getSlot(const int slotNo) const {
66
+ return this->hllByteArr[slotNo];
67
+ }
68
+
69
+ template<typename A>
70
+ void Hll8Array<A>::putSlot(const int slotNo, uint8_t value) {
71
+ this->hllByteArr[slotNo] = value;
72
+ }
73
+
74
+ template<typename A>
75
+ int Hll8Array<A>::getHllByteArrBytes() const {
76
+ return this->hll8ArrBytes(this->lgConfigK);
77
+ }
78
+
79
+ template<typename A>
80
+ HllSketchImpl<A>* Hll8Array<A>::couponUpdate(int coupon) {
81
+ internalCouponUpdate(coupon);
82
+ return this;
83
+ }
84
+
85
+ template<typename A>
86
+ void Hll8Array<A>::internalCouponUpdate(int coupon) {
87
+ const int configKmask = (1 << this->lgConfigK) - 1;
88
+ const int slotNo = HllUtil<A>::getLow26(coupon) & configKmask;
89
+ const int newVal = HllUtil<A>::getValue(coupon);
90
+
91
+ const int curVal = getSlot(slotNo);
92
+ if (newVal > curVal) {
93
+ putSlot(slotNo, newVal);
94
+ this->hipAndKxQIncrementalUpdate(curVal, newVal);
95
+ if (curVal == 0) {
96
+ this->numAtCurMin--; // interpret numAtCurMin as num zeros
97
+ }
98
+ }
99
+ }
100
+
101
+ template<typename A>
102
+ void Hll8Array<A>::mergeList(const CouponList<A>& src) {
103
+ for (auto coupon: src) {
104
+ internalCouponUpdate(coupon);
105
+ }
106
+ }
107
+
108
+ template<typename A>
109
+ void Hll8Array<A>::mergeHll(const HllArray<A>& src) {
110
+ // at this point src_k >= dst_k
111
+ const int src_k = 1 << src.getLgConfigK();
112
+ const int dst_mask = (1 << this->getLgConfigK()) - 1;
113
+ // duplication below is to avoid a virtual method call in a loop
114
+ if (src.getTgtHllType() == target_hll_type::HLL_8) {
115
+ for (int i = 0; i < src_k; i++) {
116
+ const uint8_t new_v = static_cast<const Hll8Array<A>&>(src).getSlot(i);
117
+ const int j = i & dst_mask;
118
+ const uint8_t old_v = this->hllByteArr[j];
119
+ if (new_v > old_v) {
120
+ this->hllByteArr[j] = new_v;
121
+ this->hipAndKxQIncrementalUpdate(old_v, new_v);
122
+ if (old_v == 0) {
123
+ this->numAtCurMin--;
124
+ }
125
+ }
126
+ }
127
+ } else if (src.getTgtHllType() == target_hll_type::HLL_6) {
128
+ for (int i = 0; i < src_k; i++) {
129
+ const uint8_t new_v = static_cast<const Hll6Array<A>&>(src).getSlot(i);
130
+ const int j = i & dst_mask;
131
+ const uint8_t old_v = this->hllByteArr[j];
132
+ if (new_v > old_v) {
133
+ this->hllByteArr[j] = new_v;
134
+ this->hipAndKxQIncrementalUpdate(old_v, new_v);
135
+ if (old_v == 0) {
136
+ this->numAtCurMin--;
137
+ }
138
+ }
139
+ }
140
+ } else { // HLL_4
141
+ for (int i = 0; i < src_k; i++) {
142
+ const uint8_t new_v = static_cast<const Hll4Array<A>&>(src).get_value(i);
143
+ const int j = i & dst_mask;
144
+ const uint8_t old_v = this->hllByteArr[j];
145
+ if (new_v > old_v) {
146
+ this->hllByteArr[j] = new_v;
147
+ this->hipAndKxQIncrementalUpdate(old_v, new_v);
148
+ if (old_v == 0) {
149
+ this->numAtCurMin--;
150
+ }
151
+ }
152
+ }
153
+ }
154
+ }
155
+
156
+ }
157
+
158
+ #endif // _HLL8ARRAY_INTERNAL_HPP_
@@ -0,0 +1,56 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _HLL8ARRAY_HPP_
21
+ #define _HLL8ARRAY_HPP_
22
+
23
+ #include "HllArray.hpp"
24
+
25
+ namespace datasketches {
26
+
27
+ template<typename A>
28
+ class Hll8Iterator;
29
+
30
+ template<typename A>
31
+ class Hll8Array final : public HllArray<A> {
32
+ public:
33
+ explicit Hll8Array(int lgConfigK, bool startFullSize);
34
+ explicit Hll8Array(const Hll8Array& that);
35
+
36
+ virtual ~Hll8Array();
37
+ virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
38
+
39
+ virtual Hll8Array<A>* copy() const;
40
+
41
+ inline uint8_t getSlot(int slotNo) const;
42
+ inline void putSlot(int slotNo, uint8_t value);
43
+
44
+ virtual HllSketchImpl<A>* couponUpdate(int coupon) final;
45
+ void mergeList(const CouponList<A>& src);
46
+ void mergeHll(const HllArray<A>& src);
47
+
48
+ virtual int getHllByteArrBytes() const;
49
+
50
+ private:
51
+ inline void internalCouponUpdate(int coupon);
52
+ };
53
+
54
+ }
55
+
56
+ #endif /* _HLL8ARRAY_HPP_ */
@@ -0,0 +1,706 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _HLLARRAY_INTERNAL_HPP_
21
+ #define _HLLARRAY_INTERNAL_HPP_
22
+
23
+ #include "HllArray.hpp"
24
+ #include "HllUtil.hpp"
25
+ #include "HarmonicNumbers.hpp"
26
+ #include "CubicInterpolation.hpp"
27
+ #include "CompositeInterpolationXTable.hpp"
28
+ #include "CouponList.hpp"
29
+ #include "inv_pow2_table.hpp"
30
+ #include <cstring>
31
+ #include <cmath>
32
+ #include <stdexcept>
33
+ #include <string>
34
+
35
+ namespace datasketches {
36
+
37
+ template<typename A>
38
+ HllArray<A>::HllArray(const int lgConfigK, const target_hll_type tgtHllType, bool startFullSize)
39
+ : HllSketchImpl<A>(lgConfigK, tgtHllType, hll_mode::HLL, startFullSize) {
40
+ hipAccum = 0.0;
41
+ kxq0 = 1 << lgConfigK;
42
+ kxq1 = 0.0;
43
+ curMin = 0;
44
+ numAtCurMin = 1 << lgConfigK;
45
+ oooFlag = false;
46
+ hllByteArr = nullptr; // allocated in derived class
47
+ }
48
+
49
+ template<typename A>
50
+ HllArray<A>::HllArray(const HllArray<A>& that):
51
+ HllSketchImpl<A>(that.lgConfigK, that.tgtHllType, hll_mode::HLL, that.startFullSize),
52
+ hipAccum(that.hipAccum),
53
+ kxq0(that.kxq0),
54
+ kxq1(that.kxq1),
55
+ hllByteArr(nullptr),
56
+ curMin(that.curMin),
57
+ numAtCurMin(that.numAtCurMin),
58
+ oooFlag(that.oooFlag)
59
+ {
60
+ const int arrayLen = that.getHllByteArrBytes();
61
+ typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
62
+ hllByteArr = uint8Alloc().allocate(arrayLen);
63
+ std::copy(that.hllByteArr, that.hllByteArr + arrayLen, hllByteArr);
64
+ }
65
+
66
+ template<typename A>
67
+ HllArray<A>::~HllArray() {
68
+ // need to determine number of bytes to deallocate
69
+ int hllArrBytes = 0;
70
+ if (this->tgtHllType == target_hll_type::HLL_4) {
71
+ hllArrBytes = hll4ArrBytes(this->lgConfigK);
72
+ } else if (this->tgtHllType == target_hll_type::HLL_6) {
73
+ hllArrBytes = hll6ArrBytes(this->lgConfigK);
74
+ } else { // tgtHllType == HLL_8
75
+ hllArrBytes = hll8ArrBytes(this->lgConfigK);
76
+ }
77
+ typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> uint8Alloc;
78
+ uint8Alloc().deallocate(hllByteArr, hllArrBytes);
79
+ }
80
+
81
+ template<typename A>
82
+ HllArray<A>* HllArray<A>::copyAs(const target_hll_type tgtHllType) const {
83
+ if (tgtHllType == this->getTgtHllType()) {
84
+ return static_cast<HllArray*>(copy());
85
+ }
86
+ if (tgtHllType == target_hll_type::HLL_4) {
87
+ return HllSketchImplFactory<A>::convertToHll4(*this);
88
+ } else if (tgtHllType == target_hll_type::HLL_6) {
89
+ return HllSketchImplFactory<A>::convertToHll6(*this);
90
+ } else { // tgtHllType == HLL_8
91
+ return HllSketchImplFactory<A>::convertToHll8(*this);
92
+ }
93
+ }
94
+
95
+ template<typename A>
96
+ HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len) {
97
+ if (len < HllUtil<A>::HLL_BYTE_ARR_START) {
98
+ throw std::out_of_range("Input data length insufficient to hold HLL array");
99
+ }
100
+
101
+ const uint8_t* data = static_cast<const uint8_t*>(bytes);
102
+ if (data[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::HLL_PREINTS) {
103
+ throw std::invalid_argument("Incorrect number of preInts in input stream");
104
+ }
105
+ if (data[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
106
+ throw std::invalid_argument("Wrong ser ver in input stream");
107
+ }
108
+ if (data[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
109
+ throw std::invalid_argument("Input array is not an HLL sketch");
110
+ }
111
+
112
+ const hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
113
+ if (mode != HLL) {
114
+ throw std::invalid_argument("Calling HLL array construtor with non-HLL mode data");
115
+ }
116
+
117
+ const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
118
+ const bool oooFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
119
+ const bool comapctFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
120
+ const bool startFullSizeFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::FULL_SIZE_FLAG_MASK) ? true : false);
121
+
122
+ const int lgK = (int) data[HllUtil<A>::LG_K_BYTE];
123
+ const int curMin = (int) data[HllUtil<A>::HLL_CUR_MIN_BYTE];
124
+
125
+ const int arrayBytes = hllArrBytes(tgtHllType, lgK);
126
+ if (len < static_cast<size_t>(HllUtil<A>::HLL_BYTE_ARR_START + arrayBytes)) {
127
+ throw std::out_of_range("Input array too small to hold sketch image");
128
+ }
129
+
130
+ double hip, kxq0, kxq1;
131
+ std::memcpy(&hip, data + HllUtil<A>::HIP_ACCUM_DOUBLE, sizeof(double));
132
+ std::memcpy(&kxq0, data + HllUtil<A>::KXQ0_DOUBLE, sizeof(double));
133
+ std::memcpy(&kxq1, data + HllUtil<A>::KXQ1_DOUBLE, sizeof(double));
134
+
135
+ int numAtCurMin, auxCount;
136
+ std::memcpy(&numAtCurMin, data + HllUtil<A>::CUR_MIN_COUNT_INT, sizeof(int));
137
+ std::memcpy(&auxCount, data + HllUtil<A>::AUX_COUNT_INT, sizeof(int));
138
+
139
+ AuxHashMap<A>* auxHashMap = nullptr;
140
+ typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr;
141
+ aux_hash_map_ptr aux_ptr;
142
+ if (auxCount > 0) { // necessarily TgtHllType == HLL_4
143
+ int auxLgIntArrSize = (int) data[4];
144
+ const size_t offset = HllUtil<A>::HLL_BYTE_ARR_START + arrayBytes;
145
+ const uint8_t* auxDataStart = data + offset;
146
+ auxHashMap = AuxHashMap<A>::deserialize(auxDataStart, len - offset, lgK, auxCount, auxLgIntArrSize, comapctFlag);
147
+ aux_ptr = aux_hash_map_ptr(auxHashMap, auxHashMap->make_deleter());
148
+ }
149
+
150
+ HllArray<A>* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag);
151
+ sketch->putCurMin(curMin);
152
+ sketch->putOutOfOrderFlag(oooFlag);
153
+ if (!oooFlag) sketch->putHipAccum(hip);
154
+ sketch->putKxQ0(kxq0);
155
+ sketch->putKxQ1(kxq1);
156
+ sketch->putNumAtCurMin(numAtCurMin);
157
+
158
+ std::memcpy(sketch->hllByteArr, data + HllUtil<A>::HLL_BYTE_ARR_START, arrayBytes);
159
+
160
+ if (auxHashMap != nullptr)
161
+ ((Hll4Array<A>*)sketch)->putAuxHashMap(auxHashMap);
162
+
163
+ aux_ptr.release();
164
+ return sketch;
165
+ }
166
+
167
+ template<typename A>
168
+ HllArray<A>* HllArray<A>::newHll(std::istream& is) {
169
+ uint8_t listHeader[8];
170
+ is.read((char*)listHeader, 8 * sizeof(uint8_t));
171
+
172
+ if (listHeader[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::HLL_PREINTS) {
173
+ throw std::invalid_argument("Incorrect number of preInts in input stream");
174
+ }
175
+ if (listHeader[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
176
+ throw std::invalid_argument("Wrong ser ver in input stream");
177
+ }
178
+ if (listHeader[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
179
+ throw std::invalid_argument("Input stream is not an HLL sketch");
180
+ }
181
+
182
+ hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
183
+ if (mode != HLL) {
184
+ throw std::invalid_argument("Calling HLL construtor with non-HLL mode data");
185
+ }
186
+
187
+ const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
188
+ const bool oooFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
189
+ const bool comapctFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
190
+ const bool startFullSizeFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::FULL_SIZE_FLAG_MASK) ? true : false);
191
+
192
+ const int lgK = (int) listHeader[HllUtil<A>::LG_K_BYTE];
193
+ const int curMin = (int) listHeader[HllUtil<A>::HLL_CUR_MIN_BYTE];
194
+
195
+ HllArray* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag);
196
+ typedef std::unique_ptr<HllArray<A>, std::function<void(HllSketchImpl<A>*)>> hll_array_ptr;
197
+ hll_array_ptr sketch_ptr(sketch, sketch->get_deleter());
198
+ sketch->putCurMin(curMin);
199
+ sketch->putOutOfOrderFlag(oooFlag);
200
+
201
+ double hip, kxq0, kxq1;
202
+ is.read((char*)&hip, sizeof(hip));
203
+ is.read((char*)&kxq0, sizeof(kxq0));
204
+ is.read((char*)&kxq1, sizeof(kxq1));
205
+ if (!oooFlag) sketch->putHipAccum(hip);
206
+ sketch->putKxQ0(kxq0);
207
+ sketch->putKxQ1(kxq1);
208
+
209
+ int numAtCurMin, auxCount;
210
+ is.read((char*)&numAtCurMin, sizeof(numAtCurMin));
211
+ is.read((char*)&auxCount, sizeof(auxCount));
212
+ sketch->putNumAtCurMin(numAtCurMin);
213
+
214
+ is.read((char*)sketch->hllByteArr, sketch->getHllByteArrBytes());
215
+
216
+ if (auxCount > 0) { // necessarily TgtHllType == HLL_4
217
+ int auxLgIntArrSize = listHeader[4];
218
+ AuxHashMap<A>* auxHashMap = AuxHashMap<A>::deserialize(is, lgK, auxCount, auxLgIntArrSize, comapctFlag);
219
+ ((Hll4Array<A>*)sketch)->putAuxHashMap(auxHashMap);
220
+ }
221
+
222
+ if (!is.good())
223
+ throw std::runtime_error("error reading from std::istream");
224
+
225
+ return sketch_ptr.release();
226
+ }
227
+
228
+ template<typename A>
229
+ vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) const {
230
+ const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes;
231
+ vector_u8<A> byteArr(sketchSizeBytes);
232
+ uint8_t* bytes = byteArr.data() + header_size_bytes;
233
+ AuxHashMap<A>* auxHashMap = getAuxHashMap();
234
+
235
+ bytes[HllUtil<A>::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
236
+ bytes[HllUtil<A>::SER_VER_BYTE] = static_cast<uint8_t>(HllUtil<A>::SER_VER);
237
+ bytes[HllUtil<A>::FAMILY_BYTE] = static_cast<uint8_t>(HllUtil<A>::FAMILY_ID);
238
+ bytes[HllUtil<A>::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK);
239
+ bytes[HllUtil<A>::LG_ARR_BYTE] = static_cast<uint8_t>(auxHashMap == nullptr ? 0 : auxHashMap->getLgAuxArrInts());
240
+ bytes[HllUtil<A>::FLAGS_BYTE] = this->makeFlagsByte(compact);
241
+ bytes[HllUtil<A>::HLL_CUR_MIN_BYTE] = static_cast<uint8_t>(curMin);
242
+ bytes[HllUtil<A>::MODE_BYTE] = this->makeModeByte();
243
+
244
+ std::memcpy(bytes + HllUtil<A>::HIP_ACCUM_DOUBLE, &hipAccum, sizeof(double));
245
+ std::memcpy(bytes + HllUtil<A>::KXQ0_DOUBLE, &kxq0, sizeof(double));
246
+ std::memcpy(bytes + HllUtil<A>::KXQ1_DOUBLE, &kxq1, sizeof(double));
247
+ std::memcpy(bytes + HllUtil<A>::CUR_MIN_COUNT_INT, &numAtCurMin, sizeof(int));
248
+ const int auxCount = (auxHashMap == nullptr ? 0 : auxHashMap->getAuxCount());
249
+ std::memcpy(bytes + HllUtil<A>::AUX_COUNT_INT, &auxCount, sizeof(int));
250
+
251
+ const int hllByteArrBytes = getHllByteArrBytes();
252
+ std::memcpy(bytes + getMemDataStart(), hllByteArr, hllByteArrBytes);
253
+
254
+ // aux map if HLL_4
255
+ if (this->tgtHllType == HLL_4) {
256
+ bytes += getMemDataStart() + hllByteArrBytes; // start of auxHashMap
257
+ if (auxHashMap != nullptr) {
258
+ if (compact) {
259
+ for (uint32_t coupon: *auxHashMap) {
260
+ std::memcpy(bytes, &coupon, sizeof(coupon));
261
+ bytes += sizeof(coupon);
262
+ }
263
+ } else {
264
+ std::memcpy(bytes, auxHashMap->getAuxIntArr(), auxHashMap->getUpdatableSizeBytes());
265
+ }
266
+ } else if (!compact) {
267
+ // if updatable, we write even if currently unused so the binary can be wrapped
268
+ int auxBytes = 4 << HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK];
269
+ std::fill_n(bytes, auxBytes, 0);
270
+ }
271
+ }
272
+
273
+ return byteArr;
274
+ }
275
+
276
+ template<typename A>
277
+ void HllArray<A>::serialize(std::ostream& os, const bool compact) const {
278
+ // header
279
+ const uint8_t preInts(getPreInts());
280
+ os.write((char*)&preInts, sizeof(preInts));
281
+ const uint8_t serialVersion(HllUtil<A>::SER_VER);
282
+ os.write((char*)&serialVersion, sizeof(serialVersion));
283
+ const uint8_t familyId(HllUtil<A>::FAMILY_ID);
284
+ os.write((char*)&familyId, sizeof(familyId));
285
+ const uint8_t lgKByte((uint8_t) this->lgConfigK);
286
+ os.write((char*)&lgKByte, sizeof(lgKByte));
287
+
288
+ AuxHashMap<A>* auxHashMap = getAuxHashMap();
289
+ uint8_t lgArrByte(0);
290
+ if (auxHashMap != nullptr) {
291
+ lgArrByte = auxHashMap->getLgAuxArrInts();
292
+ }
293
+ os.write((char*)&lgArrByte, sizeof(lgArrByte));
294
+
295
+ const uint8_t flagsByte(this->makeFlagsByte(compact));
296
+ os.write((char*)&flagsByte, sizeof(flagsByte));
297
+ const uint8_t curMinByte((uint8_t) curMin);
298
+ os.write((char*)&curMinByte, sizeof(curMinByte));
299
+ const uint8_t modeByte(this->makeModeByte());
300
+ os.write((char*)&modeByte, sizeof(modeByte));
301
+
302
+ // estimator data
303
+ os.write((char*)&hipAccum, sizeof(hipAccum));
304
+ os.write((char*)&kxq0, sizeof(kxq0));
305
+ os.write((char*)&kxq1, sizeof(kxq1));
306
+
307
+ // array data
308
+ os.write((char*)&numAtCurMin, sizeof(numAtCurMin));
309
+
310
+ const int auxCount = (auxHashMap == nullptr ? 0 : auxHashMap->getAuxCount());
311
+ os.write((char*)&auxCount, sizeof(auxCount));
312
+ os.write((char*)hllByteArr, getHllByteArrBytes());
313
+
314
+ // aux map if HLL_4
315
+ if (this->tgtHllType == HLL_4) {
316
+ if (auxHashMap != nullptr) {
317
+ if (compact) {
318
+ for (uint32_t coupon: *auxHashMap) {
319
+ os.write((char*)&coupon, sizeof(coupon));
320
+ }
321
+ } else {
322
+ os.write((char*)auxHashMap->getAuxIntArr(), auxHashMap->getUpdatableSizeBytes());
323
+ }
324
+ } else if (!compact) {
325
+ // if updatable, we write even if currently unused so the binary can be wrapped
326
+ int auxBytes = 4 << HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK];
327
+ std::fill_n(std::ostreambuf_iterator<char>(os), auxBytes, 0);
328
+ }
329
+ }
330
+ }
331
+
332
+ template<typename A>
333
+ double HllArray<A>::getEstimate() const {
334
+ if (oooFlag) {
335
+ return getCompositeEstimate();
336
+ }
337
+ return getHipAccum();
338
+ }
339
+
340
+ // HLL UPPER AND LOWER BOUNDS
341
+
342
+ /*
343
+ * The upper and lower bounds are not symmetric and thus are treated slightly differently.
344
+ * For the lower bound, when the unique count is <= k, LB >= numNonZeros, where
345
+ * numNonZeros = k - numAtCurMin AND curMin == 0.
346
+ *
347
+ * For HLL6 and HLL8, curMin is always 0 and numAtCurMin is initialized to k and is decremented
348
+ * down for each valid update until it reaches 0, where it stays. Thus, for these two
349
+ * isomorphs, when numAtCurMin = 0, means the true curMin is > 0 and the unique count must be
350
+ * greater than k.
351
+ *
352
+ * HLL4 always maintains both curMin and numAtCurMin dynamically. Nonetheless, the rules for
353
+ * the very small values <= k where curMin = 0 still apply.
354
+ */
355
+ template<typename A>
356
+ double HllArray<A>::getLowerBound(const int numStdDev) const {
357
+ HllUtil<A>::checkNumStdDev(numStdDev);
358
+ const int configK = 1 << this->lgConfigK;
359
+ const double numNonZeros = ((curMin == 0) ? (configK - numAtCurMin) : configK);
360
+
361
+ double estimate;
362
+ double rseFactor;
363
+ if (oooFlag) {
364
+ estimate = getCompositeEstimate();
365
+ rseFactor = HllUtil<A>::HLL_NON_HIP_RSE_FACTOR;
366
+ } else {
367
+ estimate = hipAccum;
368
+ rseFactor = HllUtil<A>::HLL_HIP_RSE_FACTOR;
369
+ }
370
+
371
+ double relErr;
372
+ if (this->lgConfigK > 12) {
373
+ relErr = (numStdDev * rseFactor) / sqrt(configK);
374
+ } else {
375
+ relErr = HllUtil<A>::getRelErr(false, oooFlag, this->lgConfigK, numStdDev);
376
+ }
377
+ return fmax(estimate / (1.0 + relErr), numNonZeros);
378
+ }
379
+
380
+ template<typename A>
381
+ double HllArray<A>::getUpperBound(const int numStdDev) const {
382
+ HllUtil<A>::checkNumStdDev(numStdDev);
383
+ const int configK = 1 << this->lgConfigK;
384
+
385
+ double estimate;
386
+ double rseFactor;
387
+ if (oooFlag) {
388
+ estimate = getCompositeEstimate();
389
+ rseFactor = HllUtil<A>::HLL_NON_HIP_RSE_FACTOR;
390
+ } else {
391
+ estimate = hipAccum;
392
+ rseFactor = HllUtil<A>::HLL_HIP_RSE_FACTOR;
393
+ }
394
+
395
+ double relErr;
396
+ if (this->lgConfigK > 12) {
397
+ relErr = (-1.0) * (numStdDev * rseFactor) / sqrt(configK);
398
+ } else {
399
+ relErr = HllUtil<A>::getRelErr(true, oooFlag, this->lgConfigK, numStdDev);
400
+ }
401
+ return estimate / (1.0 + relErr);
402
+ }
403
+
404
+ /**
405
+ * This is the (non-HIP) estimator.
406
+ * It is called "composite" because multiple estimators are pasted together.
407
+ * @param absHllArr an instance of the AbstractHllArray class.
408
+ * @return the composite estimate
409
+ */
410
+ // Original C: again-two-registers.c hhb_get_composite_estimate L1489
411
+ template<typename A>
412
+ double HllArray<A>::getCompositeEstimate() const {
413
+ const double rawEst = getHllRawEstimate(this->lgConfigK, kxq0 + kxq1);
414
+
415
+ const double* xArr = CompositeInterpolationXTable<A>::get_x_arr(this->lgConfigK);
416
+ const int xArrLen = CompositeInterpolationXTable<A>::get_x_arr_length();
417
+ const double yStride = CompositeInterpolationXTable<A>::get_y_stride(this->lgConfigK);
418
+
419
+ if (rawEst < xArr[0]) {
420
+ return 0;
421
+ }
422
+
423
+ const int xArrLenM1 = xArrLen - 1;
424
+
425
+ if (rawEst > xArr[xArrLenM1]) {
426
+ double finalY = yStride * xArrLenM1;
427
+ double factor = finalY / xArr[xArrLenM1];
428
+ return rawEst * factor;
429
+ }
430
+
431
+ double adjEst = CubicInterpolation<A>::usingXArrAndYStride(xArr, xArrLen, yStride, rawEst);
432
+
433
+ // We need to completely avoid the linear_counting estimator if it might have a crazy value.
434
+ // Empirical evidence suggests that the threshold 3*k will keep us safe if 2^4 <= k <= 2^21.
435
+
436
+ if (adjEst > (3 << this->lgConfigK)) { return adjEst; }
437
+
438
+ const double linEst =
439
+ getHllBitMapEstimate(this->lgConfigK, curMin, numAtCurMin);
440
+
441
+ // Bias is created when the value of an estimator is compared with a threshold to decide whether
442
+ // to use that estimator or a different one.
443
+ // We conjecture that less bias is created when the average of the two estimators
444
+ // is compared with the threshold. Empirical measurements support this conjecture.
445
+
446
+ const double avgEst = (adjEst + linEst) / 2.0;
447
+
448
+ // The following constants comes from empirical measurements of the crossover point
449
+ // between the average error of the linear estimator and the adjusted hll estimator
450
+ double crossOver = 0.64;
451
+ if (this->lgConfigK == 4) { crossOver = 0.718; }
452
+ else if (this->lgConfigK == 5) { crossOver = 0.672; }
453
+
454
+ return (avgEst > (crossOver * (1 << this->lgConfigK))) ? adjEst : linEst;
455
+ }
456
+
457
+ template<typename A>
458
+ double HllArray<A>::getKxQ0() const {
459
+ return kxq0;
460
+ }
461
+
462
+ template<typename A>
463
+ double HllArray<A>::getKxQ1() const {
464
+ return kxq1;
465
+ }
466
+
467
+ template<typename A>
468
+ double HllArray<A>::getHipAccum() const {
469
+ return hipAccum;
470
+ }
471
+
472
+ template<typename A>
473
+ int HllArray<A>::getCurMin() const {
474
+ return curMin;
475
+ }
476
+
477
+ template<typename A>
478
+ int HllArray<A>::getNumAtCurMin() const {
479
+ return numAtCurMin;
480
+ }
481
+
482
+ template<typename A>
483
+ void HllArray<A>::putKxQ0(const double kxq0) {
484
+ this->kxq0 = kxq0;
485
+ }
486
+
487
+ template<typename A>
488
+ void HllArray<A>::putKxQ1(const double kxq1) {
489
+ this->kxq1 = kxq1;
490
+ }
491
+
492
+ template<typename A>
493
+ void HllArray<A>::putHipAccum(const double hipAccum) {
494
+ this->hipAccum = hipAccum;
495
+ }
496
+
497
+ template<typename A>
498
+ void HllArray<A>::putCurMin(const int curMin) {
499
+ this->curMin = curMin;
500
+ }
501
+
502
+ template<typename A>
503
+ void HllArray<A>::putNumAtCurMin(const int numAtCurMin) {
504
+ this->numAtCurMin = numAtCurMin;
505
+ }
506
+
507
+ template<typename A>
508
+ void HllArray<A>::decNumAtCurMin() {
509
+ --numAtCurMin;
510
+ }
511
+
512
+ template<typename A>
513
+ void HllArray<A>::addToHipAccum(const double delta) {
514
+ hipAccum += delta;
515
+ }
516
+
517
+ template<typename A>
518
+ bool HllArray<A>::isCompact() const {
519
+ return false;
520
+ }
521
+
522
+ template<typename A>
523
+ bool HllArray<A>::isEmpty() const {
524
+ const int configK = 1 << this->lgConfigK;
525
+ return (getCurMin() == 0) && (getNumAtCurMin() == configK);
526
+ }
527
+
528
+ template<typename A>
529
+ void HllArray<A>::putOutOfOrderFlag(bool flag) {
530
+ oooFlag = flag;
531
+ }
532
+
533
+ template<typename A>
534
+ bool HllArray<A>::isOutOfOrderFlag() const {
535
+ return oooFlag;
536
+ }
537
+
538
+ template<typename A>
539
+ int HllArray<A>::hllArrBytes(target_hll_type tgtHllType, int lgConfigK) {
540
+ switch (tgtHllType) {
541
+ case HLL_4:
542
+ return hll4ArrBytes(lgConfigK);
543
+ case HLL_6:
544
+ return hll6ArrBytes(lgConfigK);
545
+ case HLL_8:
546
+ return hll8ArrBytes(lgConfigK);
547
+ default:
548
+ throw std::invalid_argument("Invalid target HLL type");
549
+ }
550
+ }
551
+
552
+ template<typename A>
553
+ int HllArray<A>::hll4ArrBytes(const int lgConfigK) {
554
+ return 1 << (lgConfigK - 1);
555
+ }
556
+
557
+ template<typename A>
558
+ int HllArray<A>::hll6ArrBytes(const int lgConfigK) {
559
+ const int numSlots = 1 << lgConfigK;
560
+ return ((numSlots * 3) >> 2) + 1;
561
+ }
562
+
563
+ template<typename A>
564
+ int HllArray<A>::hll8ArrBytes(const int lgConfigK) {
565
+ return 1 << lgConfigK;
566
+ }
567
+
568
+ template<typename A>
569
+ int HllArray<A>::getMemDataStart() const {
570
+ return HllUtil<A>::HLL_BYTE_ARR_START;
571
+ }
572
+
573
+ template<typename A>
574
+ int HllArray<A>::getUpdatableSerializationBytes() const {
575
+ return HllUtil<A>::HLL_BYTE_ARR_START + getHllByteArrBytes();
576
+ }
577
+
578
+ template<typename A>
579
+ int HllArray<A>::getCompactSerializationBytes() const {
580
+ AuxHashMap<A>* auxHashMap = getAuxHashMap();
581
+ const int auxCountBytes = ((auxHashMap == nullptr) ? 0 : auxHashMap->getCompactSizeBytes());
582
+ return HllUtil<A>::HLL_BYTE_ARR_START + getHllByteArrBytes() + auxCountBytes;
583
+ }
584
+
585
+ template<typename A>
586
+ int HllArray<A>::getPreInts() const {
587
+ return HllUtil<A>::HLL_PREINTS;
588
+ }
589
+
590
+ template<typename A>
591
+ AuxHashMap<A>* HllArray<A>::getAuxHashMap() const {
592
+ return nullptr;
593
+ }
594
+
595
+ template<typename A>
596
+ void HllArray<A>::hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue) {
597
+ const int configK = 1 << this->getLgConfigK();
598
+ // update hip BEFORE updating kxq
599
+ if (!oooFlag) hipAccum += configK / (kxq0 + kxq1);
600
+ // update kxq0 and kxq1; subtract first, then add
601
+ if (oldValue < 32) { kxq0 -= INVERSE_POWERS_OF_2[oldValue]; }
602
+ else { kxq1 -= INVERSE_POWERS_OF_2[oldValue]; }
603
+ if (newValue < 32) { kxq0 += INVERSE_POWERS_OF_2[newValue]; }
604
+ else { kxq1 += INVERSE_POWERS_OF_2[newValue]; }
605
+ }
606
+
607
+ /**
608
+ * Estimator when N is small, roughly less than k log(k).
609
+ * Refer to Wikipedia: Coupon Collector Problem
610
+ * @return the very low range estimate
611
+ */
612
+ //In C: again-two-registers.c hhb_get_improved_linear_counting_estimate L1274
613
+ template<typename A>
614
+ double HllArray<A>::getHllBitMapEstimate(const int lgConfigK, const int curMin, const int numAtCurMin) const {
615
+ const int configK = 1 << lgConfigK;
616
+ const int numUnhitBuckets = ((curMin == 0) ? numAtCurMin : 0);
617
+
618
+ //This will eventually go away.
619
+ if (numUnhitBuckets == 0) {
620
+ return configK * log(configK / 0.5);
621
+ }
622
+
623
+ const int numHitBuckets = configK - numUnhitBuckets;
624
+ return HarmonicNumbers<A>::getBitMapEstimate(configK, numHitBuckets);
625
+ }
626
+
627
+ //In C: again-two-registers.c hhb_get_raw_estimate L1167
628
+ template<typename A>
629
+ double HllArray<A>::getHllRawEstimate(const int lgConfigK, const double kxqSum) const {
630
+ const int configK = 1 << lgConfigK;
631
+ double correctionFactor;
632
+ if (lgConfigK == 4) { correctionFactor = 0.673; }
633
+ else if (lgConfigK == 5) { correctionFactor = 0.697; }
634
+ else if (lgConfigK == 6) { correctionFactor = 0.709; }
635
+ else { correctionFactor = 0.7213 / (1.0 + (1.079 / configK)); }
636
+ const double hyperEst = (correctionFactor * configK * configK) / kxqSum;
637
+ return hyperEst;
638
+ }
639
+
640
+ template<typename A>
641
+ typename HllArray<A>::const_iterator HllArray<A>::begin(bool all) const {
642
+ return const_iterator(hllByteArr, 1 << this->lgConfigK, 0, this->tgtHllType, nullptr, 0, all);
643
+ }
644
+
645
+ template<typename A>
646
+ typename HllArray<A>::const_iterator HllArray<A>::end() const {
647
+ return const_iterator(hllByteArr, 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, nullptr, 0, false);
648
+ }
649
+
650
+ template<typename A>
651
+ HllArray<A>::const_iterator::const_iterator(const uint8_t* array, size_t array_size, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset, bool all):
652
+ array(array), array_size(array_size), index(index), hll_type(hll_type), exceptions(exceptions), offset(offset), all(all)
653
+ {
654
+ while (this->index < array_size) {
655
+ value = get_value(array, this->index, hll_type, exceptions, offset);
656
+ if (all || value != HllUtil<A>::EMPTY) break;
657
+ this->index++;
658
+ }
659
+ }
660
+
661
+ template<typename A>
662
+ typename HllArray<A>::const_iterator& HllArray<A>::const_iterator::operator++() {
663
+ while (++index < array_size) {
664
+ value = get_value(array, index, hll_type, exceptions, offset);
665
+ if (all || value != HllUtil<A>::EMPTY) break;
666
+ }
667
+ return *this;
668
+ }
669
+
670
+ template<typename A>
671
+ bool HllArray<A>::const_iterator::operator!=(const const_iterator& other) const {
672
+ return index != other.index;
673
+ }
674
+
675
+ template<typename A>
676
+ uint32_t HllArray<A>::const_iterator::operator*() const {
677
+ return HllUtil<A>::pair(index, value);
678
+ }
679
+
680
+ template<typename A>
681
+ uint8_t HllArray<A>::const_iterator::get_value(const uint8_t* array, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset) {
682
+ if (hll_type == target_hll_type::HLL_4) {
683
+ uint8_t value = array[index >> 1];
684
+ if ((index & 1) > 0) { // odd
685
+ value >>= 4;
686
+ } else {
687
+ value &= HllUtil<A>::loNibbleMask;
688
+ }
689
+ if (value == HllUtil<A>::AUX_TOKEN) { // exception
690
+ return exceptions->mustFindValueFor(index);
691
+ }
692
+ return value + offset;
693
+ } else if (hll_type == target_hll_type::HLL_6) {
694
+ const int start_bit = index * 6;
695
+ const int shift = start_bit & 0x7;
696
+ const int byte_idx = start_bit >> 3;
697
+ const uint16_t two_byte_val = (array[byte_idx + 1] << 8) | array[byte_idx];
698
+ return (two_byte_val >> shift) & HllUtil<A>::VAL_MASK_6;
699
+ }
700
+ // HLL_8
701
+ return array[index];
702
+ }
703
+
704
+ }
705
+
706
+ #endif // _HLLARRAY_INTERNAL_HPP_