datasketches 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,587 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <sstream>
21
+
22
+ #include "binomial_bounds.hpp"
23
+ #include "theta_helpers.hpp"
24
+
25
+ namespace datasketches {
26
+
27
+ template<typename S, typename A>
28
+ bool tuple_sketch<S, A>::is_estimation_mode() const {
29
+ return get_theta64() < theta_constants::MAX_THETA && !is_empty();
30
+ }
31
+
32
+ template<typename S, typename A>
33
+ double tuple_sketch<S, A>::get_theta() const {
34
+ return static_cast<double>(get_theta64()) / theta_constants::MAX_THETA;
35
+ }
36
+
37
+ template<typename S, typename A>
38
+ double tuple_sketch<S, A>::get_estimate() const {
39
+ return get_num_retained() / get_theta();
40
+ }
41
+
42
+ template<typename S, typename A>
43
+ double tuple_sketch<S, A>::get_lower_bound(uint8_t num_std_devs) const {
44
+ if (!is_estimation_mode()) return get_num_retained();
45
+ return binomial_bounds::get_lower_bound(get_num_retained(), get_theta(), num_std_devs);
46
+ }
47
+
48
+ template<typename S, typename A>
49
+ double tuple_sketch<S, A>::get_upper_bound(uint8_t num_std_devs) const {
50
+ if (!is_estimation_mode()) return get_num_retained();
51
+ return binomial_bounds::get_upper_bound(get_num_retained(), get_theta(), num_std_devs);
52
+ }
53
+
54
+ template<typename S, typename A>
55
+ string<A> tuple_sketch<S, A>::to_string(bool detail) const {
56
+ std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
57
+ os << "### Tuple sketch summary:" << std::endl;
58
+ os << " num retained entries : " << get_num_retained() << std::endl;
59
+ os << " seed hash : " << get_seed_hash() << std::endl;
60
+ os << " empty? : " << (is_empty() ? "true" : "false") << std::endl;
61
+ os << " ordered? : " << (is_ordered() ? "true" : "false") << std::endl;
62
+ os << " estimation mode? : " << (is_estimation_mode() ? "true" : "false") << std::endl;
63
+ os << " theta (fraction) : " << get_theta() << std::endl;
64
+ os << " theta (raw 64-bit) : " << get_theta64() << std::endl;
65
+ os << " estimate : " << this->get_estimate() << std::endl;
66
+ os << " lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
67
+ os << " upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
68
+ print_specifics(os);
69
+ os << "### End sketch summary" << std::endl;
70
+ if (detail) {
71
+ os << "### Retained entries" << std::endl;
72
+ for (const auto& it: *this) {
73
+ os << it.first << ": " << it.second << std::endl;
74
+ }
75
+ os << "### End retained entries" << std::endl;
76
+ }
77
+ return os.str();
78
+ }
79
+
80
+ // update sketch
81
+
82
+ template<typename S, typename U, typename P, typename A>
83
+ update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
84
+ policy_(policy),
85
+ map_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator)
86
+ {}
87
+
88
+ template<typename S, typename U, typename P, typename A>
89
+ A update_tuple_sketch<S, U, P, A>::get_allocator() const {
90
+ return map_.allocator_;
91
+ }
92
+
93
+ template<typename S, typename U, typename P, typename A>
94
+ bool update_tuple_sketch<S, U, P, A>::is_empty() const {
95
+ return map_.is_empty_;
96
+ }
97
+
98
+ template<typename S, typename U, typename P, typename A>
99
+ bool update_tuple_sketch<S, U, P, A>::is_ordered() const {
100
+ return false;
101
+ }
102
+
103
+ template<typename S, typename U, typename P, typename A>
104
+ uint64_t update_tuple_sketch<S, U, P, A>::get_theta64() const {
105
+ return map_.theta_;
106
+ }
107
+
108
+ template<typename S, typename U, typename P, typename A>
109
+ uint32_t update_tuple_sketch<S, U, P, A>::get_num_retained() const {
110
+ return map_.num_entries_;
111
+ }
112
+
113
+ template<typename S, typename U, typename P, typename A>
114
+ uint16_t update_tuple_sketch<S, U, P, A>::get_seed_hash() const {
115
+ return compute_seed_hash(map_.seed_);
116
+ }
117
+
118
+ template<typename S, typename U, typename P, typename A>
119
+ uint8_t update_tuple_sketch<S, U, P, A>::get_lg_k() const {
120
+ return map_.lg_nom_size_;
121
+ }
122
+
123
+ template<typename S, typename U, typename P, typename A>
124
+ auto update_tuple_sketch<S, U, P, A>::get_rf() const -> resize_factor {
125
+ return map_.rf_;
126
+ }
127
+
128
+ template<typename S, typename U, typename P, typename A>
129
+ template<typename UU>
130
+ void update_tuple_sketch<S, U, P, A>::update(uint64_t key, UU&& value) {
131
+ update(&key, sizeof(key), std::forward<UU>(value));
132
+ }
133
+
134
+ template<typename S, typename U, typename P, typename A>
135
+ template<typename UU>
136
+ void update_tuple_sketch<S, U, P, A>::update(int64_t key, UU&& value) {
137
+ update(&key, sizeof(key), std::forward<UU>(value));
138
+ }
139
+
140
+ template<typename S, typename U, typename P, typename A>
141
+ template<typename UU>
142
+ void update_tuple_sketch<S, U, P, A>::update(uint32_t key, UU&& value) {
143
+ update(static_cast<int32_t>(key), std::forward<UU>(value));
144
+ }
145
+
146
+ template<typename S, typename U, typename P, typename A>
147
+ template<typename UU>
148
+ void update_tuple_sketch<S, U, P, A>::update(int32_t key, UU&& value) {
149
+ update(static_cast<int64_t>(key), std::forward<UU>(value));
150
+ }
151
+
152
+ template<typename S, typename U, typename P, typename A>
153
+ template<typename UU>
154
+ void update_tuple_sketch<S, U, P, A>::update(uint16_t key, UU&& value) {
155
+ update(static_cast<int16_t>(key), std::forward<UU>(value));
156
+ }
157
+
158
+ template<typename S, typename U, typename P, typename A>
159
+ template<typename UU>
160
+ void update_tuple_sketch<S, U, P, A>::update(int16_t key, UU&& value) {
161
+ update(static_cast<int64_t>(key), std::forward<UU>(value));
162
+ }
163
+
164
+ template<typename S, typename U, typename P, typename A>
165
+ template<typename UU>
166
+ void update_tuple_sketch<S, U, P, A>::update(uint8_t key, UU&& value) {
167
+ update(static_cast<int8_t>(key), std::forward<UU>(value));
168
+ }
169
+
170
+ template<typename S, typename U, typename P, typename A>
171
+ template<typename UU>
172
+ void update_tuple_sketch<S, U, P, A>::update(int8_t key, UU&& value) {
173
+ update(static_cast<int64_t>(key), std::forward<UU>(value));
174
+ }
175
+
176
+ template<typename S, typename U, typename P, typename A>
177
+ template<typename UU>
178
+ void update_tuple_sketch<S, U, P, A>::update(const std::string& key, UU&& value) {
179
+ if (key.empty()) return;
180
+ update(key.c_str(), key.length(), std::forward<UU>(value));
181
+ }
182
+
183
+ template<typename S, typename U, typename P, typename A>
184
+ template<typename UU>
185
+ void update_tuple_sketch<S, U, P, A>::update(double key, UU&& value) {
186
+ update(canonical_double(key), std::forward<UU>(value));
187
+ }
188
+
189
+ template<typename S, typename U, typename P, typename A>
190
+ template<typename UU>
191
+ void update_tuple_sketch<S, U, P, A>::update(float key, UU&& value) {
192
+ update(static_cast<double>(key), std::forward<UU>(value));
193
+ }
194
+
195
+ template<typename S, typename U, typename P, typename A>
196
+ template<typename UU>
197
+ void update_tuple_sketch<S, U, P, A>::update(const void* key, size_t length, UU&& value) {
198
+ const uint64_t hash = map_.hash_and_screen(key, length);
199
+ if (hash == 0) return;
200
+ auto result = map_.find(hash);
201
+ if (!result.second) {
202
+ S summary = policy_.create();
203
+ policy_.update(summary, std::forward<UU>(value));
204
+ map_.insert(result.first, Entry(hash, std::move(summary)));
205
+ } else {
206
+ policy_.update((*result.first).second, std::forward<UU>(value));
207
+ }
208
+ }
209
+
210
+ template<typename S, typename U, typename P, typename A>
211
+ void update_tuple_sketch<S, U, P, A>::trim() {
212
+ map_.trim();
213
+ }
214
+
215
+ template<typename S, typename U, typename P, typename A>
216
+ auto update_tuple_sketch<S, U, P, A>::begin() -> iterator {
217
+ return iterator(map_.entries_, 1 << map_.lg_cur_size_, 0);
218
+ }
219
+
220
+ template<typename S, typename U, typename P, typename A>
221
+ auto update_tuple_sketch<S, U, P, A>::end() -> iterator {
222
+ return iterator(nullptr, 0, 1 << map_.lg_cur_size_);
223
+ }
224
+
225
+ template<typename S, typename U, typename P, typename A>
226
+ auto update_tuple_sketch<S, U, P, A>::begin() const -> const_iterator {
227
+ return const_iterator(map_.entries_, 1 << map_.lg_cur_size_, 0);
228
+ }
229
+
230
+ template<typename S, typename U, typename P, typename A>
231
+ auto update_tuple_sketch<S, U, P, A>::end() const -> const_iterator {
232
+ return const_iterator(nullptr, 0, 1 << map_.lg_cur_size_);
233
+ }
234
+
235
+ template<typename S, typename U, typename P, typename A>
236
+ compact_tuple_sketch<S, A> update_tuple_sketch<S, U, P, A>::compact(bool ordered) const {
237
+ return compact_tuple_sketch<S, A>(*this, ordered);
238
+ }
239
+
240
+ template<typename S, typename U, typename P, typename A>
241
+ void update_tuple_sketch<S, U, P, A>::print_specifics(std::basic_ostream<char>& os) const {
242
+ os << " lg nominal size : " << (int) map_.lg_nom_size_ << std::endl;
243
+ os << " lg current size : " << (int) map_.lg_cur_size_ << std::endl;
244
+ os << " resize factor : " << (1 << map_.rf_) << std::endl;
245
+ }
246
+
247
+ // compact sketch
248
+
249
+ template<typename S, typename A>
250
+ compact_tuple_sketch<S, A>::compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
251
+ std::vector<Entry, AllocEntry>&& entries):
252
+ is_empty_(is_empty),
253
+ is_ordered_(is_ordered),
254
+ seed_hash_(seed_hash),
255
+ theta_(theta),
256
+ entries_(std::move(entries))
257
+ {}
258
+
259
+ template<typename S, typename A>
260
+ compact_tuple_sketch<S, A>::compact_tuple_sketch(const Base& other, bool ordered):
261
+ is_empty_(other.is_empty()),
262
+ is_ordered_(other.is_ordered() || ordered),
263
+ seed_hash_(other.get_seed_hash()),
264
+ theta_(other.get_theta64()),
265
+ entries_(other.get_allocator())
266
+ {
267
+ entries_.reserve(other.get_num_retained());
268
+ std::copy(other.begin(), other.end(), std::back_inserter(entries_));
269
+ if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end(), comparator());
270
+ }
271
+
272
+ template<typename S, typename A>
273
+ compact_tuple_sketch<S, A>::compact_tuple_sketch(compact_tuple_sketch&& other) noexcept:
274
+ is_empty_(other.is_empty()),
275
+ is_ordered_(other.is_ordered()),
276
+ seed_hash_(other.get_seed_hash()),
277
+ theta_(other.get_theta64()),
278
+ entries_(std::move(other.entries_))
279
+ {}
280
+
281
+ template<typename S, typename A>
282
+ compact_tuple_sketch<S, A>::compact_tuple_sketch(const theta_sketch_experimental<AllocU64>& other, const S& summary, bool ordered):
283
+ is_empty_(other.is_empty()),
284
+ is_ordered_(other.is_ordered() || ordered),
285
+ seed_hash_(other.get_seed_hash()),
286
+ theta_(other.get_theta64()),
287
+ entries_(other.get_allocator())
288
+ {
289
+ entries_.reserve(other.get_num_retained());
290
+ for (uint64_t hash: other) {
291
+ entries_.push_back(Entry(hash, summary));
292
+ }
293
+ if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end(), comparator());
294
+ }
295
+
296
+ template<typename S, typename A>
297
+ A compact_tuple_sketch<S, A>::get_allocator() const {
298
+ return entries_.get_allocator();
299
+ }
300
+
301
+ template<typename S, typename A>
302
+ bool compact_tuple_sketch<S, A>::is_empty() const {
303
+ return is_empty_;
304
+ }
305
+
306
+ template<typename S, typename A>
307
+ bool compact_tuple_sketch<S, A>::is_ordered() const {
308
+ return is_ordered_;
309
+ }
310
+
311
+ template<typename S, typename A>
312
+ uint64_t compact_tuple_sketch<S, A>::get_theta64() const {
313
+ return theta_;
314
+ }
315
+
316
+ template<typename S, typename A>
317
+ uint32_t compact_tuple_sketch<S, A>::get_num_retained() const {
318
+ return entries_.size();
319
+ }
320
+
321
+ template<typename S, typename A>
322
+ uint16_t compact_tuple_sketch<S, A>::get_seed_hash() const {
323
+ return seed_hash_;
324
+ }
325
+
326
+ // implementation for fixed-size arithmetic types (integral and floating point)
327
+ template<typename S, typename A>
328
+ template<typename SD, typename SS, typename std::enable_if<std::is_arithmetic<SS>::value, int>::type>
329
+ size_t compact_tuple_sketch<S, A>::get_serialized_size_summaries_bytes(const SD& sd) const {
330
+ unused(sd);
331
+ return entries_.size() * sizeof(SS);
332
+ }
333
+
334
+ // implementation for all other types (non-arithmetic)
335
+ template<typename S, typename A>
336
+ template<typename SD, typename SS, typename std::enable_if<!std::is_arithmetic<SS>::value, int>::type>
337
+ size_t compact_tuple_sketch<S, A>::get_serialized_size_summaries_bytes(const SD& sd) const {
338
+ size_t size = 0;
339
+ for (const auto& it: entries_) {
340
+ size += sd.size_of_item(it.second);
341
+ }
342
+ return size;
343
+ }
344
+
345
+ template<typename S, typename A>
346
+ template<typename SerDe>
347
+ void compact_tuple_sketch<S, A>::serialize(std::ostream& os, const SerDe& sd) const {
348
+ const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
349
+ const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
350
+ os.write(reinterpret_cast<const char*>(&preamble_longs), sizeof(preamble_longs));
351
+ const uint8_t serial_version = SERIAL_VERSION;
352
+ os.write(reinterpret_cast<const char*>(&serial_version), sizeof(serial_version));
353
+ const uint8_t family = SKETCH_FAMILY;
354
+ os.write(reinterpret_cast<const char*>(&family), sizeof(family));
355
+ const uint8_t type = SKETCH_TYPE;
356
+ os.write(reinterpret_cast<const char*>(&type), sizeof(type));
357
+ const uint8_t unused8 = 0;
358
+ os.write(reinterpret_cast<const char*>(&unused8), sizeof(unused8));
359
+ const uint8_t flags_byte(
360
+ (1 << flags::IS_COMPACT) |
361
+ (1 << flags::IS_READ_ONLY) |
362
+ (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
363
+ (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
364
+ );
365
+ os.write(reinterpret_cast<const char*>(&flags_byte), sizeof(flags_byte));
366
+ const uint16_t seed_hash = get_seed_hash();
367
+ os.write(reinterpret_cast<const char*>(&seed_hash), sizeof(seed_hash));
368
+ if (!this->is_empty()) {
369
+ if (!is_single_item) {
370
+ const uint32_t num_entries = entries_.size();
371
+ os.write(reinterpret_cast<const char*>(&num_entries), sizeof(num_entries));
372
+ const uint32_t unused32 = 0;
373
+ os.write(reinterpret_cast<const char*>(&unused32), sizeof(unused32));
374
+ if (this->is_estimation_mode()) {
375
+ os.write(reinterpret_cast<const char*>(&(this->theta_)), sizeof(uint64_t));
376
+ }
377
+ }
378
+ for (const auto& it: entries_) {
379
+ os.write(reinterpret_cast<const char*>(&it.first), sizeof(uint64_t));
380
+ sd.serialize(os, &it.second, 1);
381
+ }
382
+ }
383
+ }
384
+
385
+ template<typename S, typename A>
386
+ template<typename SerDe>
387
+ auto compact_tuple_sketch<S, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
388
+ const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
389
+ const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
390
+ const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs
391
+ + sizeof(uint64_t) * entries_.size() + get_serialized_size_summaries_bytes(sd);
392
+ vector_bytes bytes(size, 0, entries_.get_allocator());
393
+ uint8_t* ptr = bytes.data() + header_size_bytes;
394
+ const uint8_t* end_ptr = ptr + size;
395
+
396
+ ptr += copy_to_mem(&preamble_longs, ptr, sizeof(preamble_longs));
397
+ const uint8_t serial_version = SERIAL_VERSION;
398
+ ptr += copy_to_mem(&serial_version, ptr, sizeof(serial_version));
399
+ const uint8_t family = SKETCH_FAMILY;
400
+ ptr += copy_to_mem(&family, ptr, sizeof(family));
401
+ const uint8_t type = SKETCH_TYPE;
402
+ ptr += copy_to_mem(&type, ptr, sizeof(type));
403
+ const uint8_t unused8 = 0;
404
+ ptr += copy_to_mem(&unused8, ptr, sizeof(unused8));
405
+ const uint8_t flags_byte(
406
+ (1 << flags::IS_COMPACT) |
407
+ (1 << flags::IS_READ_ONLY) |
408
+ (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
409
+ (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
410
+ );
411
+ ptr += copy_to_mem(&flags_byte, ptr, sizeof(flags_byte));
412
+ const uint16_t seed_hash = get_seed_hash();
413
+ ptr += copy_to_mem(&seed_hash, ptr, sizeof(seed_hash));
414
+ if (!this->is_empty()) {
415
+ if (!is_single_item) {
416
+ const uint32_t num_entries = entries_.size();
417
+ ptr += copy_to_mem(&num_entries, ptr, sizeof(num_entries));
418
+ const uint32_t unused32 = 0;
419
+ ptr += copy_to_mem(&unused32, ptr, sizeof(unused32));
420
+ if (this->is_estimation_mode()) {
421
+ ptr += copy_to_mem(&theta_, ptr, sizeof(uint64_t));
422
+ }
423
+ }
424
+ for (const auto& it: entries_) {
425
+ ptr += copy_to_mem(&it.first, ptr, sizeof(uint64_t));
426
+ ptr += sd.serialize(ptr, end_ptr - ptr, &it.second, 1);
427
+ }
428
+ }
429
+ return bytes;
430
+ }
431
+
432
+ template<typename S, typename A>
433
+ template<typename SerDe>
434
+ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(std::istream& is, uint64_t seed, const SerDe& sd, const A& allocator) {
435
+ uint8_t preamble_longs;
436
+ is.read(reinterpret_cast<char*>(&preamble_longs), sizeof(preamble_longs));
437
+ uint8_t serial_version;
438
+ is.read(reinterpret_cast<char*>(&serial_version), sizeof(serial_version));
439
+ uint8_t family;
440
+ is.read(reinterpret_cast<char*>(&family), sizeof(family));
441
+ uint8_t type;
442
+ is.read(reinterpret_cast<char*>(&type), sizeof(type));
443
+ uint8_t unused8;
444
+ is.read(reinterpret_cast<char*>(&unused8), sizeof(unused8));
445
+ uint8_t flags_byte;
446
+ is.read(reinterpret_cast<char*>(&flags_byte), sizeof(flags_byte));
447
+ uint16_t seed_hash;
448
+ is.read(reinterpret_cast<char*>(&seed_hash), sizeof(seed_hash));
449
+ checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
450
+ checker<true>::check_sketch_family(family, SKETCH_FAMILY);
451
+ checker<true>::check_sketch_type(type, SKETCH_TYPE);
452
+ const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
453
+ if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
454
+
455
+ uint64_t theta = theta_constants::MAX_THETA;
456
+ uint32_t num_entries = 0;
457
+ if (!is_empty) {
458
+ if (preamble_longs == 1) {
459
+ num_entries = 1;
460
+ } else {
461
+ is.read(reinterpret_cast<char*>(&num_entries), sizeof(num_entries));
462
+ uint32_t unused32;
463
+ is.read(reinterpret_cast<char*>(&unused32), sizeof(unused32));
464
+ if (preamble_longs > 2) {
465
+ is.read(reinterpret_cast<char*>(&theta), sizeof(theta));
466
+ }
467
+ }
468
+ }
469
+ A alloc(allocator);
470
+ std::vector<Entry, AllocEntry> entries(alloc);
471
+ if (!is_empty) {
472
+ entries.reserve(num_entries);
473
+ std::unique_ptr<S, deleter_of_summaries> summary(alloc.allocate(1), deleter_of_summaries(1, false));
474
+ for (size_t i = 0; i < num_entries; ++i) {
475
+ uint64_t key;
476
+ is.read(reinterpret_cast<char*>(&key), sizeof(uint64_t));
477
+ sd.deserialize(is, summary.get(), 1);
478
+ entries.push_back(Entry(key, std::move(*summary)));
479
+ (*summary).~S();
480
+ }
481
+ }
482
+ if (!is.good()) throw std::runtime_error("error reading from std::istream");
483
+ const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
484
+ return compact_tuple_sketch(is_empty, is_ordered, seed_hash, theta, std::move(entries));
485
+ }
486
+
487
+ template<typename S, typename A>
488
+ template<typename SerDe>
489
+ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* bytes, size_t size, uint64_t seed, const SerDe& sd, const A& allocator) {
490
+ ensure_minimum_memory(size, 8);
491
+ const char* ptr = static_cast<const char*>(bytes);
492
+ const char* base = ptr;
493
+ uint8_t preamble_longs;
494
+ ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
495
+ uint8_t serial_version;
496
+ ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
497
+ uint8_t family;
498
+ ptr += copy_from_mem(ptr, &family, sizeof(family));
499
+ uint8_t type;
500
+ ptr += copy_from_mem(ptr, &type, sizeof(type));
501
+ uint8_t unused8;
502
+ ptr += copy_from_mem(ptr, &unused8, sizeof(unused8));
503
+ uint8_t flags_byte;
504
+ ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
505
+ uint16_t seed_hash;
506
+ ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
507
+ checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
508
+ checker<true>::check_sketch_family(family, SKETCH_FAMILY);
509
+ checker<true>::check_sketch_type(type, SKETCH_TYPE);
510
+ const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
511
+ if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
512
+
513
+ uint64_t theta = theta_constants::MAX_THETA;
514
+ uint32_t num_entries = 0;
515
+
516
+ if (!is_empty) {
517
+ if (preamble_longs == 1) {
518
+ num_entries = 1;
519
+ } else {
520
+ ensure_minimum_memory(size, 8); // read the first prelong before this method
521
+ ptr += copy_from_mem(ptr, &num_entries, sizeof(num_entries));
522
+ uint32_t unused32;
523
+ ptr += copy_from_mem(ptr, &unused32, sizeof(unused32));
524
+ if (preamble_longs > 2) {
525
+ ensure_minimum_memory(size, (preamble_longs - 1) << 3);
526
+ ptr += copy_from_mem(ptr, &theta, sizeof(theta));
527
+ }
528
+ }
529
+ }
530
+ const size_t keys_size_bytes = sizeof(uint64_t) * num_entries;
531
+ ensure_minimum_memory(size, ptr - base + keys_size_bytes);
532
+ A alloc(allocator);
533
+ std::vector<Entry, AllocEntry> entries(alloc);
534
+ if (!is_empty) {
535
+ entries.reserve(num_entries);
536
+ std::unique_ptr<S, deleter_of_summaries> summary(alloc.allocate(1), deleter_of_summaries(1, false));
537
+ for (size_t i = 0; i < num_entries; ++i) {
538
+ uint64_t key;
539
+ ptr += copy_from_mem(ptr, &key, sizeof(key));
540
+ ptr += sd.deserialize(ptr, base + size - ptr, summary.get(), 1);
541
+ entries.push_back(Entry(key, std::move(*summary)));
542
+ (*summary).~S();
543
+ }
544
+ }
545
+ const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
546
+ return compact_tuple_sketch(is_empty, is_ordered, seed_hash, theta, std::move(entries));
547
+ }
548
+
549
+ template<typename S, typename A>
550
+ auto compact_tuple_sketch<S, A>::begin() -> iterator {
551
+ return iterator(entries_.data(), entries_.size(), 0);
552
+ }
553
+
554
+ template<typename S, typename A>
555
+ auto compact_tuple_sketch<S, A>::end() -> iterator {
556
+ return iterator(nullptr, 0, entries_.size());
557
+ }
558
+
559
+ template<typename S, typename A>
560
+ auto compact_tuple_sketch<S, A>::begin() const -> const_iterator {
561
+ return const_iterator(entries_.data(), entries_.size(), 0);
562
+ }
563
+
564
+ template<typename S, typename A>
565
+ auto compact_tuple_sketch<S, A>::end() const -> const_iterator {
566
+ return const_iterator(nullptr, 0, entries_.size());
567
+ }
568
+
569
+ template<typename S, typename A>
570
+ void compact_tuple_sketch<S, A>::print_specifics(std::basic_ostream<char>&) const {}
571
+
572
+ // builder
573
+
574
+ template<typename D, typename P, typename A>
575
+ tuple_base_builder<D, P, A>::tuple_base_builder(const P& policy, const A& allocator):
576
+ theta_base_builder<D, A>(allocator), policy_(policy) {}
577
+
578
+ template<typename S, typename U, typename P, typename A>
579
+ update_tuple_sketch<S, U, P, A>::builder::builder(const P& policy, const A& allocator):
580
+ tuple_base_builder<builder, P, A>(policy, allocator) {}
581
+
582
+ template<typename S, typename U, typename P, typename A>
583
+ auto update_tuple_sketch<S, U, P, A>::builder::build() const -> update_tuple_sketch {
584
+ return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
585
+ }
586
+
587
+ } /* namespace datasketches */