datasketches 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,149 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _HLLSKETCHIMPL_INTERNAL_HPP_
21
+ #define _HLLSKETCHIMPL_INTERNAL_HPP_
22
+
23
+ #include "HllSketchImpl.hpp"
24
+ #include "HllSketchImplFactory.hpp"
25
+
26
+ namespace datasketches {
27
+
28
+ template<typename A>
29
+ HllSketchImpl<A>::HllSketchImpl(const int lgConfigK, const target_hll_type tgtHllType,
30
+ const hll_mode mode, const bool startFullSize)
31
+ : lgConfigK(lgConfigK),
32
+ tgtHllType(tgtHllType),
33
+ mode(mode),
34
+ startFullSize(startFullSize)
35
+ {
36
+ }
37
+
38
+ template<typename A>
39
+ HllSketchImpl<A>::~HllSketchImpl() {
40
+ }
41
+
42
+ template<typename A>
43
+ target_hll_type HllSketchImpl<A>::extractTgtHllType(const uint8_t modeByte) {
44
+ switch ((modeByte >> 2) & 0x3) {
45
+ case 0:
46
+ return target_hll_type::HLL_4;
47
+ case 1:
48
+ return target_hll_type::HLL_6;
49
+ case 2:
50
+ return target_hll_type::HLL_8;
51
+ default:
52
+ throw std::invalid_argument("Invalid target HLL type");
53
+ }
54
+ }
55
+
56
+ template<typename A>
57
+ hll_mode HllSketchImpl<A>::extractCurMode(const uint8_t modeByte) {
58
+ switch (modeByte & 0x3) {
59
+ case 0:
60
+ return hll_mode::LIST;
61
+ case 1:
62
+ return hll_mode::SET;
63
+ case 2:
64
+ return hll_mode::HLL;
65
+ default:
66
+ throw std::invalid_argument("Invalid current sketch mode");
67
+ }
68
+ }
69
+
70
+ template<typename A>
71
+ uint8_t HllSketchImpl<A>::makeFlagsByte(const bool compact) const {
72
+ uint8_t flags(0);
73
+ flags |= (isEmpty() ? HllUtil<A>::EMPTY_FLAG_MASK : 0);
74
+ flags |= (compact ? HllUtil<A>::COMPACT_FLAG_MASK : 0);
75
+ flags |= (isOutOfOrderFlag() ? HllUtil<A>::OUT_OF_ORDER_FLAG_MASK : 0);
76
+ flags |= (startFullSize ? HllUtil<A>::FULL_SIZE_FLAG_MASK : 0);
77
+ return flags;
78
+ }
79
+
80
+ // lo2bits = curMode, next 2 bits = tgtHllType
81
+ // Dec Lo4Bits TgtHllType, CurMode
82
+ // 0 0000 HLL_4, LIST
83
+ // 1 0001 HLL_4, SET
84
+ // 2 0010 HLL_4, HLL
85
+ // 4 0100 HLL_6, LIST
86
+ // 5 0101 HLL_6, SET
87
+ // 6 0110 HLL_6, HLL
88
+ // 8 1000 HLL_8, LIST
89
+ // 9 1001 HLL_8, SET
90
+ // 10 1010 HLL_8, HLL
91
+ template<typename A>
92
+ uint8_t HllSketchImpl<A>::makeModeByte() const {
93
+ uint8_t byte = 0;
94
+
95
+ switch (mode) {
96
+ case LIST:
97
+ byte = 0;
98
+ break;
99
+ case SET:
100
+ byte = 1;
101
+ break;
102
+ case HLL:
103
+ byte = 2;
104
+ break;
105
+ }
106
+
107
+ switch (tgtHllType) {
108
+ case HLL_4:
109
+ byte |= (0 << 2); // for completeness
110
+ break;
111
+ case HLL_6:
112
+ byte |= (1 << 2);
113
+ break;
114
+ case HLL_8:
115
+ byte |= (2 << 2);
116
+ break;
117
+ }
118
+
119
+ return byte;
120
+ }
121
+
122
+ template<typename A>
123
+ HllSketchImpl<A>* HllSketchImpl<A>::reset() {
124
+ return HllSketchImplFactory<A>::reset(this, startFullSize);
125
+ }
126
+
127
+ template<typename A>
128
+ target_hll_type HllSketchImpl<A>::getTgtHllType() const {
129
+ return tgtHllType;
130
+ }
131
+
132
+ template<typename A>
133
+ int HllSketchImpl<A>::getLgConfigK() const {
134
+ return lgConfigK;
135
+ }
136
+
137
+ template<typename A>
138
+ hll_mode HllSketchImpl<A>::getCurMode() const {
139
+ return mode;
140
+ }
141
+
142
+ template<typename A>
143
+ bool HllSketchImpl<A>::isStartFullSize() const {
144
+ return startFullSize;
145
+ }
146
+
147
+ }
148
+
149
+ #endif // _HLLSKETCHIMPL_INTERNAL_HPP_
@@ -0,0 +1,85 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _HLLSKETCHIMPL_HPP_
21
+ #define _HLLSKETCHIMPL_HPP_
22
+
23
+ #include "HllUtil.hpp"
24
+ #include "hll.hpp" // for TgtHllType
25
+
26
+ #include <memory>
27
+
28
+ namespace datasketches {
29
+
30
+ template<typename A = std::allocator<char>>
31
+ class HllSketchImpl {
32
+ public:
33
+ HllSketchImpl(int lgConfigK, target_hll_type tgtHllType, hll_mode mode, bool startFullSize);
34
+ virtual ~HllSketchImpl();
35
+
36
+ virtual void serialize(std::ostream& os, bool compact) const = 0;
37
+ virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const = 0;
38
+
39
+ virtual HllSketchImpl* copy() const = 0;
40
+ virtual HllSketchImpl* copyAs(target_hll_type tgtHllType) const = 0;
41
+ HllSketchImpl<A>* reset();
42
+
43
+ virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const = 0;
44
+
45
+ virtual HllSketchImpl* couponUpdate(int coupon) = 0;
46
+
47
+ hll_mode getCurMode() const;
48
+
49
+ virtual double getEstimate() const = 0;
50
+ virtual double getCompositeEstimate() const = 0;
51
+ virtual double getUpperBound(int numStdDev) const = 0;
52
+ virtual double getLowerBound(int numStdDev) const = 0;
53
+
54
+ inline int getLgConfigK() const;
55
+
56
+ virtual int getMemDataStart() const = 0;
57
+
58
+ virtual int getPreInts() const = 0;
59
+
60
+ target_hll_type getTgtHllType() const;
61
+
62
+ virtual int getUpdatableSerializationBytes() const = 0;
63
+ virtual int getCompactSerializationBytes() const = 0;
64
+
65
+ virtual bool isCompact() const = 0;
66
+ virtual bool isEmpty() const = 0;
67
+ virtual bool isOutOfOrderFlag() const = 0;
68
+ virtual void putOutOfOrderFlag(bool oooFlag) = 0;
69
+ bool isStartFullSize() const;
70
+
71
+ protected:
72
+ static target_hll_type extractTgtHllType(uint8_t modeByte);
73
+ static hll_mode extractCurMode(uint8_t modeByte);
74
+ uint8_t makeFlagsByte(bool compact) const;
75
+ uint8_t makeModeByte() const;
76
+
77
+ const int lgConfigK;
78
+ const target_hll_type tgtHllType;
79
+ const hll_mode mode;
80
+ const bool startFullSize;
81
+ };
82
+
83
+ }
84
+
85
+ #endif // _HLLSKETCHIMPL_HPP_
@@ -0,0 +1,170 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _HLLSKETCHIMPLFACTORY_HPP_
21
+ #define _HLLSKETCHIMPLFACTORY_HPP_
22
+
23
+ #include "HllUtil.hpp"
24
+ #include "HllSketchImpl.hpp"
25
+ #include "CouponList.hpp"
26
+ #include "CouponHashSet.hpp"
27
+ #include "HllArray.hpp"
28
+ #include "Hll4Array.hpp"
29
+ #include "Hll6Array.hpp"
30
+ #include "Hll8Array.hpp"
31
+
32
+ namespace datasketches {
33
+
34
+ template<typename A = std::allocator<char>>
35
+ class HllSketchImplFactory final {
36
+ public:
37
+ static HllSketchImpl<A>* deserialize(std::istream& os);
38
+ static HllSketchImpl<A>* deserialize(const void* bytes, size_t len);
39
+
40
+ static CouponHashSet<A>* promoteListToSet(const CouponList<A>& list);
41
+ static HllArray<A>* promoteListOrSetToHll(const CouponList<A>& list);
42
+ static HllArray<A>* newHll(int lgConfigK, target_hll_type tgtHllType, bool startFullSize = false);
43
+
44
+ // resets the input impl, deleting the input pointer and returning a new pointer
45
+ static HllSketchImpl<A>* reset(HllSketchImpl<A>* impl, bool startFullSize);
46
+
47
+ static Hll4Array<A>* convertToHll4(const HllArray<A>& srcHllArr);
48
+ static Hll6Array<A>* convertToHll6(const HllArray<A>& srcHllArr);
49
+ static Hll8Array<A>* convertToHll8(const HllArray<A>& srcHllArr);
50
+ };
51
+
52
+ template<typename A>
53
+ CouponHashSet<A>* HllSketchImplFactory<A>::promoteListToSet(const CouponList<A>& list) {
54
+ typedef typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>> chsAlloc;
55
+ CouponHashSet<A>* chSet = new (chsAlloc().allocate(1)) CouponHashSet<A>(list.getLgConfigK(), list.getTgtHllType());
56
+ for (auto coupon: list) {
57
+ chSet->couponUpdate(coupon);
58
+ }
59
+ return chSet;
60
+ }
61
+
62
+ template<typename A>
63
+ HllArray<A>* HllSketchImplFactory<A>::promoteListOrSetToHll(const CouponList<A>& src) {
64
+ HllArray<A>* tgtHllArr = HllSketchImplFactory<A>::newHll(src.getLgConfigK(), src.getTgtHllType());
65
+ tgtHllArr->putKxQ0(1 << src.getLgConfigK());
66
+ for (auto coupon: src) {
67
+ tgtHllArr->couponUpdate(coupon);
68
+ }
69
+ tgtHllArr->putHipAccum(src.getEstimate());
70
+ tgtHllArr->putOutOfOrderFlag(false);
71
+ return tgtHllArr;
72
+ }
73
+
74
+ template<typename A>
75
+ HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(std::istream& is) {
76
+ // we'll hand off the sketch based on PreInts so we don't need
77
+ // to move the stream pointer back and forth -- perhaps somewhat fragile?
78
+ const int preInts = is.peek();
79
+ if (preInts == HllUtil<A>::HLL_PREINTS) {
80
+ return HllArray<A>::newHll(is);
81
+ } else if (preInts == HllUtil<A>::HASH_SET_PREINTS) {
82
+ return CouponHashSet<A>::newSet(is);
83
+ } else if (preInts == HllUtil<A>::LIST_PREINTS) {
84
+ return CouponList<A>::newList(is);
85
+ } else {
86
+ throw std::invalid_argument("Attempt to deserialize unknown object type");
87
+ }
88
+ }
89
+
90
+ template<typename A>
91
+ HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(const void* bytes, size_t len) {
92
+ // read current mode directly
93
+ const int preInts = static_cast<const uint8_t*>(bytes)[0];
94
+ if (preInts == HllUtil<A>::HLL_PREINTS) {
95
+ return HllArray<A>::newHll(bytes, len);
96
+ } else if (preInts == HllUtil<A>::HASH_SET_PREINTS) {
97
+ return CouponHashSet<A>::newSet(bytes, len);
98
+ } else if (preInts == HllUtil<A>::LIST_PREINTS) {
99
+ return CouponList<A>::newList(bytes, len);
100
+ } else {
101
+ throw std::invalid_argument("Attempt to deserialize unknown object type");
102
+ }
103
+ }
104
+
105
+ template<typename A>
106
+ HllArray<A>* HllSketchImplFactory<A>::newHll(int lgConfigK, target_hll_type tgtHllType, bool startFullSize) {
107
+ switch (tgtHllType) {
108
+ case HLL_8:
109
+ typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
110
+ return new (hll8Alloc().allocate(1)) Hll8Array<A>(lgConfigK, startFullSize);
111
+ case HLL_6:
112
+ typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc;
113
+ return new (hll6Alloc().allocate(1)) Hll6Array<A>(lgConfigK, startFullSize);
114
+ case HLL_4:
115
+ typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc;
116
+ return new (hll4Alloc().allocate(1)) Hll4Array<A>(lgConfigK, startFullSize);
117
+ }
118
+ throw std::logic_error("Invalid target_hll_type");
119
+ }
120
+
121
+ template<typename A>
122
+ HllSketchImpl<A>* HllSketchImplFactory<A>::reset(HllSketchImpl<A>* impl, bool startFullSize) {
123
+ if (startFullSize) {
124
+ HllArray<A>* hll = newHll(impl->getLgConfigK(), impl->getTgtHllType(), startFullSize);
125
+ impl->get_deleter()(impl);
126
+ return hll;
127
+ } else {
128
+ typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc;
129
+ CouponList<A>* cl = new (clAlloc().allocate(1)) CouponList<A>(impl->getLgConfigK(), impl->getTgtHllType(), hll_mode::LIST);
130
+ impl->get_deleter()(impl);
131
+ return cl;
132
+ }
133
+ }
134
+
135
+ template<typename A>
136
+ Hll4Array<A>* HllSketchImplFactory<A>::convertToHll4(const HllArray<A>& srcHllArr) {
137
+ const int lgConfigK = srcHllArr.getLgConfigK();
138
+ typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc;
139
+ Hll4Array<A>* hll4Array = new (hll4Alloc().allocate(1)) Hll4Array<A>(lgConfigK, srcHllArr.isStartFullSize());
140
+ hll4Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
141
+ hll4Array->mergeHll(srcHllArr);
142
+ hll4Array->putHipAccum(srcHllArr.getHipAccum());
143
+ return hll4Array;
144
+ }
145
+
146
+ template<typename A>
147
+ Hll6Array<A>* HllSketchImplFactory<A>::convertToHll6(const HllArray<A>& srcHllArr) {
148
+ const int lgConfigK = srcHllArr.getLgConfigK();
149
+ typedef typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>> hll6Alloc;
150
+ Hll6Array<A>* hll6Array = new (hll6Alloc().allocate(1)) Hll6Array<A>(lgConfigK, srcHllArr.isStartFullSize());
151
+ hll6Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
152
+ hll6Array->mergeHll(srcHllArr);
153
+ hll6Array->putHipAccum(srcHllArr.getHipAccum());
154
+ return hll6Array;
155
+ }
156
+
157
+ template<typename A>
158
+ Hll8Array<A>* HllSketchImplFactory<A>::convertToHll8(const HllArray<A>& srcHllArr) {
159
+ const int lgConfigK = srcHllArr.getLgConfigK();
160
+ typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
161
+ Hll8Array<A>* hll8Array = new (hll8Alloc().allocate(1)) Hll8Array<A>(lgConfigK, srcHllArr.isStartFullSize());
162
+ hll8Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
163
+ hll8Array->mergeHll(srcHllArr);
164
+ hll8Array->putHipAccum(srcHllArr.getHipAccum());
165
+ return hll8Array;
166
+ }
167
+
168
+ }
169
+
170
+ #endif /* _HLLSKETCHIMPLFACTORY_HPP_ */
@@ -0,0 +1,287 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _HLLUNION_INTERNAL_HPP_
21
+ #define _HLLUNION_INTERNAL_HPP_
22
+
23
+ #include "hll.hpp"
24
+
25
+ #include "HllSketchImpl.hpp"
26
+ #include "HllArray.hpp"
27
+ #include "HllUtil.hpp"
28
+
29
+ #include <stdexcept>
30
+ #include <string>
31
+
32
+ namespace datasketches {
33
+
34
+ template<typename A>
35
+ hll_union_alloc<A>::hll_union_alloc(const int lg_max_k):
36
+ lg_max_k(HllUtil<A>::checkLgK(lg_max_k)),
37
+ gadget(lg_max_k, target_hll_type::HLL_8)
38
+ {}
39
+
40
+ template<typename A>
41
+ hll_sketch_alloc<A> hll_union_alloc<A>::get_result(target_hll_type target_type) const {
42
+ return hll_sketch_alloc<A>(gadget, target_type);
43
+ }
44
+
45
+ template<typename A>
46
+ void hll_union_alloc<A>::update(const hll_sketch_alloc<A>& sketch) {
47
+ if (sketch.is_empty()) return;
48
+ union_impl(sketch, lg_max_k);
49
+ }
50
+
51
+ template<typename A>
52
+ void hll_union_alloc<A>::update(hll_sketch_alloc<A>&& sketch) {
53
+ if (sketch.is_empty()) return;
54
+ if (gadget.is_empty() && sketch.get_target_type() == HLL_8 && sketch.get_lg_config_k() <= lg_max_k) {
55
+ if (sketch.get_current_mode() == HLL || sketch.get_lg_config_k() == lg_max_k) {
56
+ gadget = std::move(sketch);
57
+ }
58
+ }
59
+ union_impl(sketch, lg_max_k);
60
+ }
61
+
62
+ template<typename A>
63
+ void hll_union_alloc<A>::update(const std::string& datum) {
64
+ gadget.update(datum);
65
+ }
66
+
67
+ template<typename A>
68
+ void hll_union_alloc<A>::update(const uint64_t datum) {
69
+ gadget.update(datum);
70
+ }
71
+
72
+ template<typename A>
73
+ void hll_union_alloc<A>::update(const uint32_t datum) {
74
+ gadget.update(datum);
75
+ }
76
+
77
+ template<typename A>
78
+ void hll_union_alloc<A>::update(const uint16_t datum) {
79
+ gadget.update(datum);
80
+ }
81
+
82
+ template<typename A>
83
+ void hll_union_alloc<A>::update(const uint8_t datum) {
84
+ gadget.update(datum);
85
+ }
86
+
87
+ template<typename A>
88
+ void hll_union_alloc<A>::update(const int64_t datum) {
89
+ gadget.update(datum);
90
+ }
91
+
92
+ template<typename A>
93
+ void hll_union_alloc<A>::update(const int32_t datum) {
94
+ gadget.update(datum);
95
+ }
96
+
97
+ template<typename A>
98
+ void hll_union_alloc<A>::update(const int16_t datum) {
99
+ gadget.update(datum);
100
+ }
101
+
102
+ template<typename A>
103
+ void hll_union_alloc<A>::update(const int8_t datum) {
104
+ gadget.update(datum);
105
+ }
106
+
107
+ template<typename A>
108
+ void hll_union_alloc<A>::update(const double datum) {
109
+ gadget.update(datum);
110
+ }
111
+
112
+ template<typename A>
113
+ void hll_union_alloc<A>::update(const float datum) {
114
+ gadget.update(datum);
115
+ }
116
+
117
+ template<typename A>
118
+ void hll_union_alloc<A>::update(const void* data, const size_t length_bytes) {
119
+ gadget.update(data, length_bytes);
120
+ }
121
+
122
+ template<typename A>
123
+ void hll_union_alloc<A>::coupon_update(const int coupon) {
124
+ if (coupon == HllUtil<A>::EMPTY) { return; }
125
+ HllSketchImpl<A>* result = gadget.sketch_impl->coupon_update(coupon);
126
+ if (result != gadget.sketch_impl) {
127
+ if (gadget.sketch_impl != nullptr) { gadget.sketch_impl->get_deleter()(gadget.sketch_impl); }
128
+ gadget.sketch_impl = result;
129
+ }
130
+ }
131
+
132
+ template<typename A>
133
+ double hll_union_alloc<A>::get_estimate() const {
134
+ return gadget.get_estimate();
135
+ }
136
+
137
+ template<typename A>
138
+ double hll_union_alloc<A>::get_composite_estimate() const {
139
+ return gadget.get_composite_estimate();
140
+ }
141
+
142
+ template<typename A>
143
+ double hll_union_alloc<A>::get_lower_bound(const int num_std_dev) const {
144
+ return gadget.get_lower_bound(num_std_dev);
145
+ }
146
+
147
+ template<typename A>
148
+ double hll_union_alloc<A>::get_upper_bound(const int num_std_dev) const {
149
+ return gadget.get_upper_bound(num_std_dev);
150
+ }
151
+
152
+ template<typename A>
153
+ int hll_union_alloc<A>::get_compact_serialization_bytes() const {
154
+ return gadget.get_compact_serialization_bytes();
155
+ }
156
+
157
+ template<typename A>
158
+ int hll_union_alloc<A>::get_updatable_serialization_bytes() const {
159
+ return gadget.get_updatable_serialization_bytes();
160
+ }
161
+
162
+ template<typename A>
163
+ int hll_union_alloc<A>::get_lg_config_k() const {
164
+ return gadget.get_lg_config_k();
165
+ }
166
+
167
+ template<typename A>
168
+ void hll_union_alloc<A>::reset() {
169
+ gadget.reset();
170
+ }
171
+
172
+ template<typename A>
173
+ bool hll_union_alloc<A>::is_compact() const {
174
+ return gadget.is_compact();
175
+ }
176
+
177
+ template<typename A>
178
+ bool hll_union_alloc<A>::is_empty() const {
179
+ return gadget.is_empty();
180
+ }
181
+
182
+ template<typename A>
183
+ bool hll_union_alloc<A>::is_out_of_order_flag() const {
184
+ return gadget.is_out_of_order_flag();
185
+ }
186
+
187
+ template<typename A>
188
+ hll_mode hll_union_alloc<A>::get_current_mode() const {
189
+ return gadget.get_current_mode();
190
+ }
191
+
192
+ template<typename A>
193
+ bool hll_union_alloc<A>::is_estimation_mode() const {
194
+ return gadget.is_estimation_mode();
195
+ }
196
+
197
+ template<typename A>
198
+ int hll_union_alloc<A>::get_serialization_version() const {
199
+ return HllUtil<A>::SER_VER;
200
+ }
201
+
202
+ template<typename A>
203
+ target_hll_type hll_union_alloc<A>::get_target_type() const {
204
+ return target_hll_type::HLL_8;
205
+ }
206
+
207
+ template<typename A>
208
+ int hll_union_alloc<A>::get_max_serialization_bytes(const int lg_k) {
209
+ return hll_sketch_alloc<A>::get_max_updatable_serialization_bytes(lg_k, target_hll_type::HLL_8);
210
+ }
211
+
212
+ template<typename A>
213
+ double hll_union_alloc<A>::get_rel_err(const bool upper_bound, const bool unioned,
214
+ const int lg_config_k, const int num_std_dev) {
215
+ return HllUtil<A>::getRelErr(upper_bound, unioned, lg_config_k, num_std_dev);
216
+ }
217
+
218
+ template<typename A>
219
+ HllSketchImpl<A>* hll_union_alloc<A>::copy_or_downsample(const HllSketchImpl<A>* src_impl, const int tgt_lg_k) {
220
+ if (src_impl->getCurMode() != HLL) {
221
+ throw std::logic_error("Attempt to downsample non-HLL sketch");
222
+ }
223
+ const HllArray<A>* src = static_cast<const HllArray<A>*>(src_impl);
224
+ const int src_lg_k = src->getLgConfigK();
225
+ if (src_lg_k <= tgt_lg_k) {
226
+ return src->copyAs(HLL_8);
227
+ }
228
+ typedef typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>> hll8Alloc;
229
+ Hll8Array<A>* tgtHllArr = new (hll8Alloc().allocate(1)) Hll8Array<A>(tgt_lg_k, false);
230
+ tgtHllArr->mergeHll(*src);
231
+ //both of these are required for isomorphism
232
+ tgtHllArr->putHipAccum(src->getHipAccum());
233
+ tgtHllArr->putOutOfOrderFlag(src->isOutOfOrderFlag());
234
+ return tgtHllArr;
235
+ }
236
+
237
+ template<typename A>
238
+ inline HllSketchImpl<A>* hll_union_alloc<A>::leak_free_coupon_update(HllSketchImpl<A>* impl, const int coupon) {
239
+ HllSketchImpl<A>* result = impl->couponUpdate(coupon);
240
+ if (result != impl) {
241
+ impl->get_deleter()(impl);
242
+ }
243
+ return result;
244
+ }
245
+
246
+ template<typename A>
247
+ void hll_union_alloc<A>::union_impl(const hll_sketch_alloc<A>& sketch, const int lg_max_k) {
248
+ const HllSketchImpl<A>* src_impl = sketch.sketch_impl; //default
249
+ HllSketchImpl<A>* dst_impl = gadget.sketch_impl; //default
250
+ if (src_impl->getCurMode() == LIST || src_impl->getCurMode() == SET) {
251
+ if (dst_impl->isEmpty() && src_impl->getLgConfigK() == dst_impl->getLgConfigK()) {
252
+ dst_impl = src_impl->copyAs(HLL_8);
253
+ gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget to be replaced
254
+ } else {
255
+ const CouponList<A>* src = static_cast<const CouponList<A>*>(src_impl);
256
+ for (auto coupon: *src) {
257
+ dst_impl = leak_free_coupon_update(dst_impl, coupon); //assignment required
258
+ }
259
+ }
260
+ } else if (!dst_impl->isEmpty()) { // src is HLL
261
+ if (dst_impl->getCurMode() == LIST || dst_impl->getCurMode() == SET) {
262
+ // swap so that src is LIST or SET, tgt is HLL
263
+ // use lg_max_k because LIST has effective K of 2^26
264
+ const CouponList<A>* src = static_cast<const CouponList<A>*>(dst_impl);
265
+ dst_impl = copy_or_downsample(src_impl, lg_max_k);
266
+ static_cast<Hll8Array<A>*>(dst_impl)->mergeList(*src);
267
+ gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget to be replaced
268
+ } else { // gadget is HLL
269
+ if (src_impl->getLgConfigK() < dst_impl->getLgConfigK()) {
270
+ dst_impl = copy_or_downsample(dst_impl, sketch.get_lg_config_k());
271
+ gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget to be replaced
272
+ }
273
+ const HllArray<A>* src = static_cast<const HllArray<A>*>(src_impl);
274
+ static_cast<Hll8Array<A>*>(dst_impl)->mergeHll(*src);
275
+ dst_impl->putOutOfOrderFlag(true);
276
+ static_cast<Hll8Array<A>*>(dst_impl)->putHipAccum(0);
277
+ }
278
+ } else { // src is HLL, gadget is empty
279
+ dst_impl = copy_or_downsample(src_impl, lg_max_k);
280
+ gadget.sketch_impl->get_deleter()(gadget.sketch_impl); // gadget to be replaced
281
+ }
282
+ gadget.sketch_impl = dst_impl; // gadget replaced
283
+ }
284
+
285
+ }
286
+
287
+ #endif // _HLLUNION_INTERNAL_HPP_