datasketches 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,939 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef THETA_SKETCH_IMPL_HPP_
21
+ #define THETA_SKETCH_IMPL_HPP_
22
+
23
+ #include <algorithm>
24
+ #include <cmath>
25
+ #include <memory>
26
+ #include <functional>
27
+ #include <istream>
28
+ #include <ostream>
29
+ #include <sstream>
30
+
31
+ #include "MurmurHash3.h"
32
+ #include "serde.hpp"
33
+ #include "binomial_bounds.hpp"
34
+ #include "memory_operations.hpp"
35
+
36
+ namespace datasketches {
37
+
38
+ /*
39
+ * author Alexander Saydakov
40
+ * author Lee Rhodes
41
+ * author Kevin Lang
42
+ */
43
+
44
+ template<typename A>
45
+ theta_sketch_alloc<A>::theta_sketch_alloc(bool is_empty, uint64_t theta):
46
+ is_empty_(is_empty), theta_(theta)
47
+ {}
48
+
49
+ template<typename A>
50
+ bool theta_sketch_alloc<A>::is_empty() const {
51
+ return is_empty_;
52
+ }
53
+
54
+ template<typename A>
55
+ double theta_sketch_alloc<A>::get_estimate() const {
56
+ return get_num_retained() / get_theta();
57
+ }
58
+
59
+ template<typename A>
60
+ double theta_sketch_alloc<A>::get_lower_bound(uint8_t num_std_devs) const {
61
+ if (!is_estimation_mode()) return get_num_retained();
62
+ return binomial_bounds::get_lower_bound(get_num_retained(), get_theta(), num_std_devs);
63
+ }
64
+
65
+ template<typename A>
66
+ double theta_sketch_alloc<A>::get_upper_bound(uint8_t num_std_devs) const {
67
+ if (!is_estimation_mode()) return get_num_retained();
68
+ return binomial_bounds::get_upper_bound(get_num_retained(), get_theta(), num_std_devs);
69
+ }
70
+
71
+ template<typename A>
72
+ bool theta_sketch_alloc<A>::is_estimation_mode() const {
73
+ return theta_ < MAX_THETA && !is_empty_;
74
+ }
75
+
76
+ template<typename A>
77
+ double theta_sketch_alloc<A>::get_theta() const {
78
+ return (double) theta_ / MAX_THETA;
79
+ }
80
+
81
+ template<typename A>
82
+ uint64_t theta_sketch_alloc<A>::get_theta64() const {
83
+ return theta_;
84
+ }
85
+
86
+ template<typename A>
87
+ typename theta_sketch_alloc<A>::unique_ptr theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
88
+ uint8_t preamble_longs;
89
+ is.read((char*)&preamble_longs, sizeof(preamble_longs));
90
+ uint8_t serial_version;
91
+ is.read((char*)&serial_version, sizeof(serial_version));
92
+ uint8_t type;
93
+ is.read((char*)&type, sizeof(type));
94
+ uint8_t lg_nom_size;
95
+ is.read((char*)&lg_nom_size, sizeof(lg_nom_size));
96
+ uint8_t lg_cur_size;
97
+ is.read((char*)&lg_cur_size, sizeof(lg_cur_size));
98
+ uint8_t flags_byte;
99
+ is.read((char*)&flags_byte, sizeof(flags_byte));
100
+ uint16_t seed_hash;
101
+ is.read((char*)&seed_hash, sizeof(seed_hash));
102
+
103
+ check_serial_version(serial_version, SERIAL_VERSION);
104
+
105
+ if (type == update_theta_sketch_alloc<A>::SKETCH_TYPE) {
106
+ check_seed_hash(seed_hash, get_seed_hash(seed));
107
+ typename update_theta_sketch_alloc<A>::resize_factor rf = static_cast<typename update_theta_sketch_alloc<A>::resize_factor>(preamble_longs >> 6);
108
+ typedef typename std::allocator_traits<A>::template rebind_alloc<update_theta_sketch_alloc<A>> AU;
109
+ return unique_ptr(
110
+ static_cast<theta_sketch_alloc<A>*>(new (AU().allocate(1)) update_theta_sketch_alloc<A>(update_theta_sketch_alloc<A>::internal_deserialize(is, rf, lg_cur_size, lg_nom_size, flags_byte, seed))),
111
+ [](theta_sketch_alloc<A>* ptr) {
112
+ ptr->~theta_sketch_alloc();
113
+ AU().deallocate(static_cast<update_theta_sketch_alloc<A>*>(ptr), 1);
114
+ }
115
+ );
116
+ } else if (type == compact_theta_sketch_alloc<A>::SKETCH_TYPE) {
117
+ const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
118
+ if (!is_empty) check_seed_hash(seed_hash, get_seed_hash(seed));
119
+ typedef typename std::allocator_traits<A>::template rebind_alloc<compact_theta_sketch_alloc<A>> AC;
120
+ return unique_ptr(
121
+ static_cast<theta_sketch_alloc<A>*>(new (AC().allocate(1)) compact_theta_sketch_alloc<A>(compact_theta_sketch_alloc<A>::internal_deserialize(is, preamble_longs, flags_byte, seed_hash))),
122
+ [](theta_sketch_alloc<A>* ptr) {
123
+ ptr->~theta_sketch_alloc();
124
+ AC().deallocate(static_cast<compact_theta_sketch_alloc<A>*>(ptr), 1);
125
+ }
126
+ );
127
+ }
128
+ throw std::invalid_argument("unsupported sketch type " + std::to_string((int) type));
129
+ }
130
+
131
+ template<typename A>
132
+ typename theta_sketch_alloc<A>::unique_ptr theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
133
+ ensure_minimum_memory(size, static_cast<size_t>(8));
134
+ const char* ptr = static_cast<const char*>(bytes);
135
+ uint8_t preamble_longs;
136
+ ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
137
+ uint8_t serial_version;
138
+ ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
139
+ uint8_t type;
140
+ ptr += copy_from_mem(ptr, &type, sizeof(type));
141
+ uint8_t lg_nom_size;
142
+ ptr += copy_from_mem(ptr, &lg_nom_size, sizeof(lg_nom_size));
143
+ uint8_t lg_cur_size;
144
+ ptr += copy_from_mem(ptr, &lg_cur_size, sizeof(lg_cur_size));
145
+ uint8_t flags_byte;
146
+ ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
147
+ uint16_t seed_hash;
148
+ ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
149
+
150
+ check_serial_version(serial_version, SERIAL_VERSION);
151
+
152
+ if (type == update_theta_sketch_alloc<A>::SKETCH_TYPE) {
153
+ check_seed_hash(seed_hash, get_seed_hash(seed));
154
+ typename update_theta_sketch_alloc<A>::resize_factor rf = static_cast<typename update_theta_sketch_alloc<A>::resize_factor>(preamble_longs >> 6);
155
+ typedef typename std::allocator_traits<A>::template rebind_alloc<update_theta_sketch_alloc<A>> AU;
156
+ return unique_ptr(
157
+ static_cast<theta_sketch_alloc<A>*>(new (AU().allocate(1)) update_theta_sketch_alloc<A>(
158
+ update_theta_sketch_alloc<A>::internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), rf, lg_cur_size, lg_nom_size, flags_byte, seed))
159
+ ),
160
+ [](theta_sketch_alloc<A>* ptr) {
161
+ ptr->~theta_sketch_alloc();
162
+ AU().deallocate(static_cast<update_theta_sketch_alloc<A>*>(ptr), 1);
163
+ }
164
+ );
165
+ } else if (type == compact_theta_sketch_alloc<A>::SKETCH_TYPE) {
166
+ const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
167
+ if (!is_empty) check_seed_hash(seed_hash, get_seed_hash(seed));
168
+ typedef typename std::allocator_traits<A>::template rebind_alloc<compact_theta_sketch_alloc<A>> AC;
169
+ return unique_ptr(
170
+ static_cast<theta_sketch_alloc<A>*>(new (AC().allocate(1)) compact_theta_sketch_alloc<A>(
171
+ compact_theta_sketch_alloc<A>::internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), preamble_longs, flags_byte, seed_hash))
172
+ ),
173
+ [](theta_sketch_alloc<A>* ptr) {
174
+ ptr->~theta_sketch_alloc();
175
+ AC().deallocate(static_cast<compact_theta_sketch_alloc<A>*>(ptr), 1);
176
+ }
177
+ );
178
+ }
179
+ throw std::invalid_argument("unsupported sketch type " + std::to_string((int) type));
180
+ }
181
+
182
+ template<typename A>
183
+ uint16_t theta_sketch_alloc<A>::get_seed_hash(uint64_t seed) {
184
+ HashState hashes;
185
+ MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
186
+ return hashes.h1;
187
+ }
188
+
189
+ template<typename A>
190
+ void theta_sketch_alloc<A>::check_sketch_type(uint8_t actual, uint8_t expected) {
191
+ if (actual != expected) {
192
+ throw std::invalid_argument("Sketch type mismatch: expected " + std::to_string((int)expected) + ", actual " + std::to_string((int)actual));
193
+ }
194
+ }
195
+
196
+ template<typename A>
197
+ void theta_sketch_alloc<A>::check_serial_version(uint8_t actual, uint8_t expected) {
198
+ if (actual != expected) {
199
+ throw std::invalid_argument("Sketch serial version mismatch: expected " + std::to_string((int)expected) + ", actual " + std::to_string((int)actual));
200
+ }
201
+ }
202
+
203
+ template<typename A>
204
+ void theta_sketch_alloc<A>::check_seed_hash(uint16_t actual, uint16_t expected) {
205
+ if (actual != expected) {
206
+ throw std::invalid_argument("Sketch seed hash mismatch: expected " + std::to_string(expected) + ", actual " + std::to_string(actual));
207
+ }
208
+ }
209
+
210
+ // update sketch
211
+
212
+ template<typename A>
213
+ update_theta_sketch_alloc<A>::update_theta_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t seed):
214
+ theta_sketch_alloc<A>(true, theta_sketch_alloc<A>::MAX_THETA),
215
+ lg_cur_size_(lg_cur_size),
216
+ lg_nom_size_(lg_nom_size),
217
+ keys_(1 << lg_cur_size_, 0),
218
+ num_keys_(0),
219
+ rf_(rf),
220
+ p_(p),
221
+ seed_(seed),
222
+ capacity_(get_capacity(lg_cur_size, lg_nom_size))
223
+ {
224
+ if (p < 1) this->theta_ *= p;
225
+ }
226
+
227
+ template<typename A>
228
+ update_theta_sketch_alloc<A>::update_theta_sketch_alloc(bool is_empty, uint64_t theta, uint8_t lg_cur_size, uint8_t lg_nom_size, vector_u64<A>&& keys, uint32_t num_keys, resize_factor rf, float p, uint64_t seed):
229
+ theta_sketch_alloc<A>(is_empty, theta),
230
+ lg_cur_size_(lg_cur_size),
231
+ lg_nom_size_(lg_nom_size),
232
+ keys_(std::move(keys)),
233
+ num_keys_(num_keys),
234
+ rf_(rf),
235
+ p_(p),
236
+ seed_(seed),
237
+ capacity_(get_capacity(lg_cur_size, lg_nom_size))
238
+ {}
239
+
240
+ template<typename A>
241
+ uint32_t update_theta_sketch_alloc<A>::get_num_retained() const {
242
+ return num_keys_;
243
+ }
244
+
245
+ template<typename A>
246
+ uint16_t update_theta_sketch_alloc<A>::get_seed_hash() const {
247
+ return theta_sketch_alloc<A>::get_seed_hash(seed_);
248
+ }
249
+
250
+ template<typename A>
251
+ bool update_theta_sketch_alloc<A>::is_ordered() const {
252
+ return false;
253
+ }
254
+
255
+ template<typename A>
256
+ string<A> update_theta_sketch_alloc<A>::to_string(bool print_items) const {
257
+ std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
258
+ os << "### Update Theta sketch summary:" << std::endl;
259
+ os << " lg nominal size : " << (int) lg_nom_size_ << std::endl;
260
+ os << " lg current size : " << (int) lg_cur_size_ << std::endl;
261
+ os << " num retained keys : " << num_keys_ << std::endl;
262
+ os << " resize factor : " << (1 << rf_) << std::endl;
263
+ os << " sampling probability : " << p_ << std::endl;
264
+ os << " seed hash : " << this->get_seed_hash() << std::endl;
265
+ os << " empty? : " << (this->is_empty() ? "true" : "false") << std::endl;
266
+ os << " ordered? : " << (this->is_ordered() ? "true" : "false") << std::endl;
267
+ os << " estimation mode? : " << (this->is_estimation_mode() ? "true" : "false") << std::endl;
268
+ os << " theta (fraction) : " << this->get_theta() << std::endl;
269
+ os << " theta (raw 64-bit) : " << this->theta_ << std::endl;
270
+ os << " estimate : " << this->get_estimate() << std::endl;
271
+ os << " lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
272
+ os << " upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
273
+ os << "### End sketch summary" << std::endl;
274
+ if (print_items) {
275
+ os << "### Retained keys" << std::endl;
276
+ for (auto key: *this) os << " " << key << std::endl;
277
+ os << "### End retained keys" << std::endl;
278
+ }
279
+ return os.str();
280
+ }
281
+
282
+ template<typename A>
283
+ void update_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
284
+ const uint8_t preamble_longs_and_rf = 3 | (rf_ << 6);
285
+ os.write((char*)&preamble_longs_and_rf, sizeof(preamble_longs_and_rf));
286
+ const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
287
+ os.write((char*)&serial_version, sizeof(serial_version));
288
+ const uint8_t type = SKETCH_TYPE;
289
+ os.write((char*)&type, sizeof(type));
290
+ os.write((char*)&lg_nom_size_, sizeof(lg_nom_size_));
291
+ os.write((char*)&lg_cur_size_, sizeof(lg_cur_size_));
292
+ const uint8_t flags_byte(
293
+ (this->is_empty() ? 1 << theta_sketch_alloc<A>::flags::IS_EMPTY : 0)
294
+ );
295
+ os.write((char*)&flags_byte, sizeof(flags_byte));
296
+ const uint16_t seed_hash = get_seed_hash();
297
+ os.write((char*)&seed_hash, sizeof(seed_hash));
298
+ os.write((char*)&num_keys_, sizeof(num_keys_));
299
+ os.write((char*)&p_, sizeof(p_));
300
+ os.write((char*)&(this->theta_), sizeof(uint64_t));
301
+ os.write((char*)keys_.data(), sizeof(uint64_t) * keys_.size());
302
+ }
303
+
304
+ template<typename A>
305
+ vector_u8<A> update_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
306
+ const uint8_t preamble_longs = 3;
307
+ const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + sizeof(uint64_t) * keys_.size();
308
+ vector_u8<A> bytes(size);
309
+ uint8_t* ptr = bytes.data() + header_size_bytes;
310
+
311
+ const uint8_t preamble_longs_and_rf = preamble_longs | (rf_ << 6);
312
+ ptr += copy_to_mem(&preamble_longs_and_rf, ptr, sizeof(preamble_longs_and_rf));
313
+ const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
314
+ ptr += copy_to_mem(&serial_version, ptr, sizeof(serial_version));
315
+ const uint8_t type = SKETCH_TYPE;
316
+ ptr += copy_to_mem(&type, ptr, sizeof(type));
317
+ ptr += copy_to_mem(&lg_nom_size_, ptr, sizeof(lg_nom_size_));
318
+ ptr += copy_to_mem(&lg_cur_size_, ptr, sizeof(lg_cur_size_));
319
+ const uint8_t flags_byte(
320
+ (this->is_empty() ? 1 << theta_sketch_alloc<A>::flags::IS_EMPTY : 0)
321
+ );
322
+ ptr += copy_to_mem(&flags_byte, ptr, sizeof(flags_byte));
323
+ const uint16_t seed_hash = get_seed_hash();
324
+ ptr += copy_to_mem(&seed_hash, ptr, sizeof(seed_hash));
325
+ ptr += copy_to_mem(&num_keys_, ptr, sizeof(num_keys_));
326
+ ptr += copy_to_mem(&p_, ptr, sizeof(p_));
327
+ ptr += copy_to_mem(&(this->theta_), ptr, sizeof(uint64_t));
328
+ ptr += copy_to_mem(keys_.data(), ptr, sizeof(uint64_t) * keys_.size());
329
+
330
+ return bytes;
331
+ }
332
+
333
+ template<typename A>
334
+ update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
335
+ uint8_t preamble_longs;
336
+ is.read((char*)&preamble_longs, sizeof(preamble_longs));
337
+ resize_factor rf = static_cast<resize_factor>(preamble_longs >> 6);
338
+ preamble_longs &= 0x3f; // remove resize factor
339
+ uint8_t serial_version;
340
+ is.read((char*)&serial_version, sizeof(serial_version));
341
+ uint8_t type;
342
+ is.read((char*)&type, sizeof(type));
343
+ uint8_t lg_nom_size;
344
+ is.read((char*)&lg_nom_size, sizeof(lg_nom_size));
345
+ uint8_t lg_cur_size;
346
+ is.read((char*)&lg_cur_size, sizeof(lg_cur_size));
347
+ uint8_t flags_byte;
348
+ is.read((char*)&flags_byte, sizeof(flags_byte));
349
+ uint16_t seed_hash;
350
+ is.read((char*)&seed_hash, sizeof(seed_hash));
351
+ theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
352
+ theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
353
+ theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
354
+ return internal_deserialize(is, rf, lg_cur_size, lg_nom_size, flags_byte, seed);
355
+ }
356
+
357
+ template<typename A>
358
+ update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::internal_deserialize(std::istream& is, resize_factor rf, uint8_t lg_cur_size, uint8_t lg_nom_size, uint8_t flags_byte, uint64_t seed) {
359
+ uint32_t num_keys;
360
+ is.read((char*)&num_keys, sizeof(num_keys));
361
+ float p;
362
+ is.read((char*)&p, sizeof(p));
363
+ uint64_t theta;
364
+ is.read((char*)&theta, sizeof(theta));
365
+ vector_u64<A> keys(1 << lg_cur_size);
366
+ is.read((char*)keys.data(), sizeof(uint64_t) * keys.size());
367
+ const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
368
+ if (!is.good()) throw std::runtime_error("error reading from std::istream");
369
+ return update_theta_sketch_alloc<A>(is_empty, theta, lg_cur_size, lg_nom_size, std::move(keys), num_keys, rf, p, seed);
370
+ }
371
+
372
+ template<typename A>
373
+ update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
374
+ ensure_minimum_memory(size, 8);
375
+ const char* ptr = static_cast<const char*>(bytes);
376
+ uint8_t preamble_longs;
377
+ ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
378
+ resize_factor rf = static_cast<resize_factor>(preamble_longs >> 6);
379
+ preamble_longs &= 0x3f; // remove resize factor
380
+ uint8_t serial_version;
381
+ ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
382
+ uint8_t type;
383
+ ptr += copy_from_mem(ptr, &type, sizeof(type));
384
+ uint8_t lg_nom_size;
385
+ ptr += copy_from_mem(ptr, &lg_nom_size, sizeof(lg_nom_size));
386
+ uint8_t lg_cur_size;
387
+ ptr += copy_from_mem(ptr, &lg_cur_size, sizeof(lg_cur_size));
388
+ uint8_t flags_byte;
389
+ ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
390
+ uint16_t seed_hash;
391
+ ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
392
+ theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
393
+ theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
394
+ theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
395
+ return internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), rf, lg_cur_size, lg_nom_size, flags_byte, seed);
396
+ }
397
+
398
+ template<typename A>
399
+ update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::internal_deserialize(const void* bytes, size_t size, resize_factor rf, uint8_t lg_cur_size, uint8_t lg_nom_size, uint8_t flags_byte, uint64_t seed) {
400
+ const uint32_t table_size = 1 << lg_cur_size;
401
+ ensure_minimum_memory(size, 16 + sizeof(uint64_t) * table_size);
402
+ const char* ptr = static_cast<const char*>(bytes);
403
+ uint32_t num_keys;
404
+ ptr += copy_from_mem(ptr, &num_keys, sizeof(num_keys));
405
+ float p;
406
+ ptr += copy_from_mem(ptr, &p, sizeof(p));
407
+ uint64_t theta;
408
+ ptr += copy_from_mem(ptr, &theta, sizeof(theta));
409
+ vector_u64<A> keys(table_size);
410
+ ptr += copy_from_mem(ptr, keys.data(), sizeof(uint64_t) * table_size);
411
+ const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
412
+ return update_theta_sketch_alloc<A>(is_empty, theta, lg_cur_size, lg_nom_size, std::move(keys), num_keys, rf, p, seed);
413
+ }
414
+
415
+ template<typename A>
416
+ void update_theta_sketch_alloc<A>::update(const std::string& value) {
417
+ if (value.empty()) return;
418
+ update(value.c_str(), value.length());
419
+ }
420
+
421
+ template<typename A>
422
+ void update_theta_sketch_alloc<A>::update(uint64_t value) {
423
+ update(&value, sizeof(value));
424
+ }
425
+
426
+ template<typename A>
427
+ void update_theta_sketch_alloc<A>::update(int64_t value) {
428
+ update(&value, sizeof(value));
429
+ }
430
+
431
+ template<typename A>
432
+ void update_theta_sketch_alloc<A>::update(uint32_t value) {
433
+ update(static_cast<int32_t>(value));
434
+ }
435
+
436
+ template<typename A>
437
+ void update_theta_sketch_alloc<A>::update(int32_t value) {
438
+ update(static_cast<int64_t>(value));
439
+ }
440
+
441
+ template<typename A>
442
+ void update_theta_sketch_alloc<A>::update(uint16_t value) {
443
+ update(static_cast<int16_t>(value));
444
+ }
445
+
446
+ template<typename A>
447
+ void update_theta_sketch_alloc<A>::update(int16_t value) {
448
+ update(static_cast<int64_t>(value));
449
+ }
450
+
451
+ template<typename A>
452
+ void update_theta_sketch_alloc<A>::update(uint8_t value) {
453
+ update(static_cast<int8_t>(value));
454
+ }
455
+
456
+ template<typename A>
457
+ void update_theta_sketch_alloc<A>::update(int8_t value) {
458
+ update(static_cast<int64_t>(value));
459
+ }
460
+
461
+ template<typename A>
462
+ void update_theta_sketch_alloc<A>::update(double value) {
463
+ union {
464
+ int64_t long_value;
465
+ double double_value;
466
+ } long_double_union;
467
+
468
+ if (value == 0.0) {
469
+ long_double_union.double_value = 0.0; // canonicalize -0.0 to 0.0
470
+ } else if (std::isnan(value)) {
471
+ long_double_union.long_value = 0x7ff8000000000000L; // canonicalize NaN using value from Java's Double.doubleToLongBits()
472
+ } else {
473
+ long_double_union.double_value = value;
474
+ }
475
+ update(&long_double_union, sizeof(long_double_union));
476
+ }
477
+
478
+ template<typename A>
479
+ void update_theta_sketch_alloc<A>::update(float value) {
480
+ update(static_cast<double>(value));
481
+ }
482
+
483
+ template<typename A>
484
+ void update_theta_sketch_alloc<A>::update(const void* data, unsigned length) {
485
+ HashState hashes;
486
+ MurmurHash3_x64_128(data, length, seed_, hashes);
487
+ const uint64_t hash = hashes.h1 >> 1; // Java implementation does logical shift >>> to make values positive
488
+ internal_update(hash);
489
+ }
490
+
491
+ template<typename A>
492
+ compact_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::compact(bool ordered) const {
493
+ return compact_theta_sketch_alloc<A>(*this, ordered);
494
+ }
495
+
496
+ template<typename A>
497
+ void update_theta_sketch_alloc<A>::internal_update(uint64_t hash) {
498
+ this->is_empty_ = false;
499
+ if (hash >= this->theta_ || hash == 0) return; // hash == 0 is reserved to mark empty slots in the table
500
+ if (hash_search_or_insert(hash, keys_.data(), lg_cur_size_)) {
501
+ num_keys_++;
502
+ if (num_keys_ > capacity_) {
503
+ if (lg_cur_size_ <= lg_nom_size_) {
504
+ resize();
505
+ } else {
506
+ rebuild();
507
+ }
508
+ }
509
+ }
510
+ }
511
+
512
+ template<typename A>
513
+ void update_theta_sketch_alloc<A>::trim() {
514
+ if (num_keys_ > static_cast<uint32_t>(1 << lg_nom_size_)) rebuild();
515
+ }
516
+
517
+ template<typename A>
518
+ void update_theta_sketch_alloc<A>::resize() {
519
+ const uint8_t lg_tgt_size = lg_nom_size_ + 1;
520
+ const uint8_t factor = std::max(1, std::min(static_cast<int>(rf_), lg_tgt_size - lg_cur_size_));
521
+ const uint8_t lg_new_size = lg_cur_size_ + factor;
522
+ const uint32_t new_size = 1 << lg_new_size;
523
+ vector_u64<A> new_keys(new_size, 0);
524
+ for (uint32_t i = 0; i < keys_.size(); i++) {
525
+ if (keys_[i] != 0) {
526
+ hash_search_or_insert(keys_[i], new_keys.data(), lg_new_size); // TODO hash_insert
527
+ }
528
+ }
529
+ keys_ = std::move(new_keys);
530
+ lg_cur_size_ += factor;
531
+ capacity_ = get_capacity(lg_cur_size_, lg_nom_size_);
532
+ }
533
+
534
+ template<typename A>
535
+ void update_theta_sketch_alloc<A>::rebuild() {
536
+ const uint32_t pivot = (1 << lg_nom_size_) + keys_.size() - num_keys_;
537
+ std::nth_element(keys_.begin(), keys_.begin() + pivot, keys_.end());
538
+ this->theta_ = keys_[pivot];
539
+ vector_u64<A> new_keys(keys_.size(), 0);
540
+ num_keys_ = 0;
541
+ for (uint32_t i = 0; i < keys_.size(); i++) {
542
+ if (keys_[i] != 0 && keys_[i] < this->theta_) {
543
+ hash_search_or_insert(keys_[i], new_keys.data(), lg_cur_size_); // TODO hash_insert
544
+ num_keys_++;
545
+ }
546
+ }
547
+ keys_ = std::move(new_keys);
548
+ }
549
+
550
+ template<typename A>
551
+ uint32_t update_theta_sketch_alloc<A>::get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size) {
552
+ const double fraction = (lg_cur_size <= lg_nom_size) ? RESIZE_THRESHOLD : REBUILD_THRESHOLD;
553
+ return std::floor(fraction * (1 << lg_cur_size));
554
+ }
555
+
556
+ template<typename A>
557
+ uint32_t update_theta_sketch_alloc<A>::get_stride(uint64_t hash, uint8_t lg_size) {
558
+ // odd and independent of index assuming lg_size lowest bits of the hash were used for the index
559
+ return (2 * static_cast<uint32_t>((hash >> lg_size) & STRIDE_MASK)) + 1;
560
+ }
561
+
562
+ template<typename A>
563
+ bool update_theta_sketch_alloc<A>::hash_search_or_insert(uint64_t hash, uint64_t* table, uint8_t lg_size) {
564
+ const uint32_t mask = (1 << lg_size) - 1;
565
+ const uint32_t stride = get_stride(hash, lg_size);
566
+ uint32_t cur_probe = static_cast<uint32_t>(hash) & mask;
567
+
568
+ // search for duplicate or zero
569
+ const uint32_t loop_index = cur_probe;
570
+ do {
571
+ const uint64_t value = table[cur_probe];
572
+ if (value == 0) {
573
+ table[cur_probe] = hash; // insert value
574
+ return true;
575
+ } else if (value == hash) {
576
+ return false; // found a duplicate
577
+ }
578
+ cur_probe = (cur_probe + stride) & mask;
579
+ } while (cur_probe != loop_index);
580
+ throw std::logic_error("key not found and no empty slots!");
581
+ }
582
+
583
+ template<typename A>
584
+ bool update_theta_sketch_alloc<A>::hash_search(uint64_t hash, const uint64_t* table, uint8_t lg_size) {
585
+ const uint32_t mask = (1 << lg_size) - 1;
586
+ const uint32_t stride = update_theta_sketch_alloc<A>::get_stride(hash, lg_size);
587
+ uint32_t cur_probe = static_cast<uint32_t>(hash) & mask;
588
+ const uint32_t loop_index = cur_probe;
589
+ do {
590
+ const uint64_t value = table[cur_probe];
591
+ if (value == 0) {
592
+ return false;
593
+ } else if (value == hash) {
594
+ return true;
595
+ }
596
+ cur_probe = (cur_probe + stride) & mask;
597
+ } while (cur_probe != loop_index);
598
+ throw std::logic_error("key not found and search wrapped");
599
+ }
600
+
601
+ template<typename A>
602
+ typename theta_sketch_alloc<A>::const_iterator update_theta_sketch_alloc<A>::begin() const {
603
+ return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), 0);
604
+ }
605
+
606
+ template<typename A>
607
+ typename theta_sketch_alloc<A>::const_iterator update_theta_sketch_alloc<A>::end() const {
608
+ return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), keys_.size());
609
+ }
610
+
611
+ // compact sketch
612
+
613
+ template<typename A>
614
+ compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(bool is_empty, uint64_t theta, vector_u64<A>&& keys, uint16_t seed_hash, bool is_ordered):
615
+ theta_sketch_alloc<A>(is_empty, theta),
616
+ keys_(std::move(keys)),
617
+ seed_hash_(seed_hash),
618
+ is_ordered_(is_ordered)
619
+ {}
620
+
621
+ template<typename A>
622
+ compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(const theta_sketch_alloc<A>& other, bool ordered):
623
+ theta_sketch_alloc<A>(other),
624
+ keys_(other.get_num_retained()),
625
+ seed_hash_(other.get_seed_hash()),
626
+ is_ordered_(other.is_ordered() || ordered)
627
+ {
628
+ std::copy(other.begin(), other.end(), keys_.begin());
629
+ if (ordered && !other.is_ordered()) std::sort(keys_.begin(), keys_.end());
630
+ }
631
+
632
+ template<typename A>
633
+ uint32_t compact_theta_sketch_alloc<A>::get_num_retained() const {
634
+ return keys_.size();
635
+ }
636
+
637
+ template<typename A>
638
+ uint16_t compact_theta_sketch_alloc<A>::get_seed_hash() const {
639
+ return seed_hash_;
640
+ }
641
+
642
+ template<typename A>
643
+ bool compact_theta_sketch_alloc<A>::is_ordered() const {
644
+ return is_ordered_;
645
+ }
646
+
647
+ template<typename A>
648
+ string<A> compact_theta_sketch_alloc<A>::to_string(bool print_items) const {
649
+ std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
650
+ os << "### Compact Theta sketch summary:" << std::endl;
651
+ os << " num retained keys : " << keys_.size() << std::endl;
652
+ os << " seed hash : " << this->get_seed_hash() << std::endl;
653
+ os << " empty? : " << (this->is_empty() ? "true" : "false") << std::endl;
654
+ os << " ordered? : " << (this->is_ordered() ? "true" : "false") << std::endl;
655
+ os << " estimation mode? : " << (this->is_estimation_mode() ? "true" : "false") << std::endl;
656
+ os << " theta (fraction) : " << this->get_theta() << std::endl;
657
+ os << " theta (raw 64-bit) : " << this->theta_ << std::endl;
658
+ os << " estimate : " << this->get_estimate() << std::endl;
659
+ os << " lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
660
+ os << " upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
661
+ os << "### End sketch summary" << std::endl;
662
+ if (print_items) {
663
+ os << "### Retained keys" << std::endl;
664
+ for (auto key: *this) os << " " << key << std::endl;
665
+ os << "### End retained keys" << std::endl;
666
+ }
667
+ return os.str();
668
+ }
669
+
670
+ template<typename A>
671
+ void compact_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
672
+ const bool is_single_item = keys_.size() == 1 && !this->is_estimation_mode();
673
+ const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
674
+ os.write(reinterpret_cast<const char*>(&preamble_longs), sizeof(preamble_longs));
675
+ const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
676
+ os.write(reinterpret_cast<const char*>(&serial_version), sizeof(serial_version));
677
+ const uint8_t type = SKETCH_TYPE;
678
+ os.write(reinterpret_cast<const char*>(&type), sizeof(type));
679
+ const uint16_t unused16 = 0;
680
+ os.write(reinterpret_cast<const char*>(&unused16), sizeof(unused16));
681
+ const uint8_t flags_byte(
682
+ (1 << theta_sketch_alloc<A>::flags::IS_COMPACT) |
683
+ (1 << theta_sketch_alloc<A>::flags::IS_READ_ONLY) |
684
+ (this->is_empty() ? 1 << theta_sketch_alloc<A>::flags::IS_EMPTY : 0) |
685
+ (this->is_ordered() ? 1 << theta_sketch_alloc<A>::flags::IS_ORDERED : 0)
686
+ );
687
+ os.write(reinterpret_cast<const char*>(&flags_byte), sizeof(flags_byte));
688
+ const uint16_t seed_hash = get_seed_hash();
689
+ os.write((char*)&seed_hash, sizeof(seed_hash));
690
+ if (!this->is_empty()) {
691
+ if (!is_single_item) {
692
+ const uint32_t num_keys = keys_.size();
693
+ os.write((char*)&num_keys, sizeof(num_keys));
694
+ const uint32_t unused32 = 0;
695
+ os.write((char*)&unused32, sizeof(unused32));
696
+ if (this->is_estimation_mode()) {
697
+ os.write((char*)&(this->theta_), sizeof(uint64_t));
698
+ }
699
+ }
700
+ os.write((char*)keys_.data(), sizeof(uint64_t) * keys_.size());
701
+ }
702
+ }
703
+
704
+ template<typename A>
705
+ vector_u8<A> compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
706
+ const bool is_single_item = keys_.size() == 1 && !this->is_estimation_mode();
707
+ const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
708
+ const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + sizeof(uint64_t) * keys_.size();
709
+ vector_u8<A> bytes(size);
710
+ uint8_t* ptr = bytes.data() + header_size_bytes;
711
+
712
+ ptr += copy_to_mem(&preamble_longs, ptr, sizeof(preamble_longs));
713
+ const uint8_t serial_version = theta_sketch_alloc<A>::SERIAL_VERSION;
714
+ ptr += copy_to_mem(&serial_version, ptr, sizeof(serial_version));
715
+ const uint8_t type = SKETCH_TYPE;
716
+ ptr += copy_to_mem(&type, ptr, sizeof(type));
717
+ const uint16_t unused16 = 0;
718
+ ptr += copy_to_mem(&unused16, ptr, sizeof(unused16));
719
+ const uint8_t flags_byte(
720
+ (1 << theta_sketch_alloc<A>::flags::IS_COMPACT) |
721
+ (1 << theta_sketch_alloc<A>::flags::IS_READ_ONLY) |
722
+ (this->is_empty() ? 1 << theta_sketch_alloc<A>::flags::IS_EMPTY : 0) |
723
+ (this->is_ordered() ? 1 << theta_sketch_alloc<A>::flags::IS_ORDERED : 0)
724
+ );
725
+ ptr += copy_to_mem(&flags_byte, ptr, sizeof(flags_byte));
726
+ const uint16_t seed_hash = get_seed_hash();
727
+ ptr += copy_to_mem(&seed_hash, ptr, sizeof(seed_hash));
728
+ if (!this->is_empty()) {
729
+ if (!is_single_item) {
730
+ const uint32_t num_keys = keys_.size();
731
+ ptr += copy_to_mem(&num_keys, ptr, sizeof(num_keys));
732
+ const uint32_t unused32 = 0;
733
+ ptr += copy_to_mem(&unused32, ptr, sizeof(unused32));
734
+ if (this->is_estimation_mode()) {
735
+ ptr += copy_to_mem(&(this->theta_), ptr, sizeof(uint64_t));
736
+ }
737
+ }
738
+ ptr += copy_to_mem(keys_.data(), ptr, sizeof(uint64_t) * keys_.size());
739
+ }
740
+
741
+ return bytes;
742
+ }
743
+
744
+ template<typename A>
745
+ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
746
+ uint8_t preamble_longs;
747
+ is.read((char*)&preamble_longs, sizeof(preamble_longs));
748
+ uint8_t serial_version;
749
+ is.read((char*)&serial_version, sizeof(serial_version));
750
+ uint8_t type;
751
+ is.read((char*)&type, sizeof(type));
752
+ uint16_t unused16;
753
+ is.read((char*)&unused16, sizeof(unused16));
754
+ uint8_t flags_byte;
755
+ is.read((char*)&flags_byte, sizeof(flags_byte));
756
+ uint16_t seed_hash;
757
+ is.read((char*)&seed_hash, sizeof(seed_hash));
758
+ theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
759
+ theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
760
+ const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
761
+ if (!is_empty) theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
762
+ return internal_deserialize(is, preamble_longs, flags_byte, seed_hash);
763
+ }
764
+
765
+ template<typename A>
766
+ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::internal_deserialize(std::istream& is, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash) {
767
+ uint64_t theta = theta_sketch_alloc<A>::MAX_THETA;
768
+ uint32_t num_keys = 0;
769
+
770
+ const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
771
+ if (!is_empty) {
772
+ if (preamble_longs == 1) {
773
+ num_keys = 1;
774
+ } else {
775
+ is.read((char*)&num_keys, sizeof(num_keys));
776
+ uint32_t unused32;
777
+ is.read((char*)&unused32, sizeof(unused32));
778
+ if (preamble_longs > 2) {
779
+ is.read((char*)&theta, sizeof(theta));
780
+ }
781
+ }
782
+ }
783
+ vector_u64<A> keys(num_keys);
784
+ if (!is_empty) is.read((char*)keys.data(), sizeof(uint64_t) * keys.size());
785
+
786
+ const bool is_ordered = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_ORDERED);
787
+ if (!is.good()) throw std::runtime_error("error reading from std::istream");
788
+ return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), seed_hash, is_ordered);
789
+ }
790
+
791
+ template<typename A>
792
+ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
793
+ ensure_minimum_memory(size, 8);
794
+ const char* ptr = static_cast<const char*>(bytes);
795
+ uint8_t preamble_longs;
796
+ ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
797
+ uint8_t serial_version;
798
+ ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
799
+ uint8_t type;
800
+ ptr += copy_from_mem(ptr, &type, sizeof(type));
801
+ uint16_t unused16;
802
+ ptr += copy_from_mem(ptr, &unused16, sizeof(unused16));
803
+ uint8_t flags_byte;
804
+ ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
805
+ uint16_t seed_hash;
806
+ ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
807
+ theta_sketch_alloc<A>::check_sketch_type(type, SKETCH_TYPE);
808
+ theta_sketch_alloc<A>::check_serial_version(serial_version, theta_sketch_alloc<A>::SERIAL_VERSION);
809
+ const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
810
+ if (!is_empty) theta_sketch_alloc<A>::check_seed_hash(seed_hash, theta_sketch_alloc<A>::get_seed_hash(seed));
811
+ return internal_deserialize(ptr, size - (ptr - static_cast<const char*>(bytes)), preamble_longs, flags_byte, seed_hash);
812
+ }
813
+
814
+ template<typename A>
815
+ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::internal_deserialize(const void* bytes, size_t size, uint8_t preamble_longs, uint8_t flags_byte, uint16_t seed_hash) {
816
+ const char* ptr = static_cast<const char*>(bytes);
817
+ const char* base = ptr;
818
+
819
+ uint64_t theta = theta_sketch_alloc<A>::MAX_THETA;
820
+ uint32_t num_keys = 0;
821
+
822
+ const bool is_empty = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_EMPTY);
823
+ if (!is_empty) {
824
+ if (preamble_longs == 1) {
825
+ num_keys = 1;
826
+ } else {
827
+ ensure_minimum_memory(size, 8); // read the first prelong before this method
828
+ ptr += copy_from_mem(ptr, &num_keys, sizeof(num_keys));
829
+ uint32_t unused32;
830
+ ptr += copy_from_mem(ptr, &unused32, sizeof(unused32));
831
+ if (preamble_longs > 2) {
832
+ ensure_minimum_memory(size, (preamble_longs - 1) << 3);
833
+ ptr += copy_from_mem(ptr, &theta, sizeof(theta));
834
+ }
835
+ }
836
+ }
837
+ const size_t keys_size_bytes = sizeof(uint64_t) * num_keys;
838
+ check_memory_size(ptr - base + keys_size_bytes, size);
839
+ vector_u64<A> keys(num_keys);
840
+ if (!is_empty) ptr += copy_from_mem(ptr, keys.data(), keys_size_bytes);
841
+
842
+ const bool is_ordered = flags_byte & (1 << theta_sketch_alloc<A>::flags::IS_ORDERED);
843
+ return compact_theta_sketch_alloc<A>(is_empty, theta, std::move(keys), seed_hash, is_ordered);
844
+ }
845
+
846
+ template<typename A>
847
+ typename theta_sketch_alloc<A>::const_iterator compact_theta_sketch_alloc<A>::begin() const {
848
+ return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), 0);
849
+ }
850
+
851
+ template<typename A>
852
+ typename theta_sketch_alloc<A>::const_iterator compact_theta_sketch_alloc<A>::end() const {
853
+ return typename theta_sketch_alloc<A>::const_iterator(keys_.data(), keys_.size(), keys_.size());
854
+ }
855
+
856
+ // builder
857
+
858
+ template<typename A>
859
+ update_theta_sketch_alloc<A>::builder::builder():
860
+ lg_k_(DEFAULT_LG_K), rf_(DEFAULT_RESIZE_FACTOR), p_(1), seed_(DEFAULT_SEED) {}
861
+
862
+ template<typename A>
863
+ typename update_theta_sketch_alloc<A>::builder& update_theta_sketch_alloc<A>::builder::set_lg_k(uint8_t lg_k) {
864
+ if (lg_k < MIN_LG_K) {
865
+ throw std::invalid_argument("lg_k must not be less than " + std::to_string(MIN_LG_K) + ": " + std::to_string(lg_k));
866
+ }
867
+ lg_k_ = lg_k;
868
+ return *this;
869
+ }
870
+
871
+ template<typename A>
872
+ typename update_theta_sketch_alloc<A>::builder& update_theta_sketch_alloc<A>::builder::set_resize_factor(resize_factor rf) {
873
+ rf_ = rf;
874
+ return *this;
875
+ }
876
+
877
+ template<typename A>
878
+ typename update_theta_sketch_alloc<A>::builder& update_theta_sketch_alloc<A>::builder::set_p(float p) {
879
+ p_ = p;
880
+ return *this;
881
+ }
882
+
883
+ template<typename A>
884
+ typename update_theta_sketch_alloc<A>::builder& update_theta_sketch_alloc<A>::builder::set_seed(uint64_t seed) {
885
+ seed_ = seed;
886
+ return *this;
887
+ }
888
+
889
+ template<typename A>
890
+ uint8_t update_theta_sketch_alloc<A>::builder::starting_sub_multiple(uint8_t lg_tgt, uint8_t lg_min, uint8_t lg_rf) {
891
+ return (lg_tgt <= lg_min) ? lg_min : (lg_rf == 0) ? lg_tgt : ((lg_tgt - lg_min) % lg_rf) + lg_min;
892
+ }
893
+
894
+ template<typename A>
895
+ update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::builder::build() const {
896
+ return update_theta_sketch_alloc<A>(starting_sub_multiple(lg_k_ + 1, MIN_LG_K, static_cast<uint8_t>(rf_)), lg_k_, rf_, p_, seed_);
897
+ }
898
+
899
+ // iterator
900
+
901
+ template<typename A>
902
+ theta_sketch_alloc<A>::const_iterator::const_iterator(const uint64_t* keys, uint32_t size, uint32_t index):
903
+ keys_(keys), size_(size), index_(index) {
904
+ while (index_ < size_ && keys_[index_] == 0) ++index_;
905
+ }
906
+
907
+ template<typename A>
908
+ typename theta_sketch_alloc<A>::const_iterator& theta_sketch_alloc<A>::const_iterator::operator++() {
909
+ do {
910
+ ++index_;
911
+ } while (index_ < size_ && keys_[index_] == 0);
912
+ return *this;
913
+ }
914
+
915
+ template<typename A>
916
+ typename theta_sketch_alloc<A>::const_iterator theta_sketch_alloc<A>::const_iterator::operator++(int) {
917
+ const_iterator tmp(*this);
918
+ operator++();
919
+ return tmp;
920
+ }
921
+
922
+ template<typename A>
923
+ bool theta_sketch_alloc<A>::const_iterator::operator==(const const_iterator& other) const {
924
+ return index_ == other.index_;
925
+ }
926
+
927
+ template<typename A>
928
+ bool theta_sketch_alloc<A>::const_iterator::operator!=(const const_iterator& other) const {
929
+ return index_ != other.index_;
930
+ }
931
+
932
+ template<typename A>
933
+ uint64_t theta_sketch_alloc<A>::const_iterator::operator*() const {
934
+ return keys_[index_];
935
+ }
936
+
937
+ } /* namespace datasketches */
938
+
939
+ #endif