datasketches 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,587 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <sstream>
21
+
22
+ #include "binomial_bounds.hpp"
23
+ #include "theta_helpers.hpp"
24
+
25
+ namespace datasketches {
26
+
27
+ template<typename S, typename A>
28
+ bool tuple_sketch<S, A>::is_estimation_mode() const {
29
+ return get_theta64() < theta_constants::MAX_THETA && !is_empty();
30
+ }
31
+
32
+ template<typename S, typename A>
33
+ double tuple_sketch<S, A>::get_theta() const {
34
+ return static_cast<double>(get_theta64()) / theta_constants::MAX_THETA;
35
+ }
36
+
37
+ template<typename S, typename A>
38
+ double tuple_sketch<S, A>::get_estimate() const {
39
+ return get_num_retained() / get_theta();
40
+ }
41
+
42
+ template<typename S, typename A>
43
+ double tuple_sketch<S, A>::get_lower_bound(uint8_t num_std_devs) const {
44
+ if (!is_estimation_mode()) return get_num_retained();
45
+ return binomial_bounds::get_lower_bound(get_num_retained(), get_theta(), num_std_devs);
46
+ }
47
+
48
+ template<typename S, typename A>
49
+ double tuple_sketch<S, A>::get_upper_bound(uint8_t num_std_devs) const {
50
+ if (!is_estimation_mode()) return get_num_retained();
51
+ return binomial_bounds::get_upper_bound(get_num_retained(), get_theta(), num_std_devs);
52
+ }
53
+
54
+ template<typename S, typename A>
55
+ string<A> tuple_sketch<S, A>::to_string(bool detail) const {
56
+ std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
57
+ os << "### Tuple sketch summary:" << std::endl;
58
+ os << " num retained entries : " << get_num_retained() << std::endl;
59
+ os << " seed hash : " << get_seed_hash() << std::endl;
60
+ os << " empty? : " << (is_empty() ? "true" : "false") << std::endl;
61
+ os << " ordered? : " << (is_ordered() ? "true" : "false") << std::endl;
62
+ os << " estimation mode? : " << (is_estimation_mode() ? "true" : "false") << std::endl;
63
+ os << " theta (fraction) : " << get_theta() << std::endl;
64
+ os << " theta (raw 64-bit) : " << get_theta64() << std::endl;
65
+ os << " estimate : " << this->get_estimate() << std::endl;
66
+ os << " lower bound 95% conf : " << this->get_lower_bound(2) << std::endl;
67
+ os << " upper bound 95% conf : " << this->get_upper_bound(2) << std::endl;
68
+ print_specifics(os);
69
+ os << "### End sketch summary" << std::endl;
70
+ if (detail) {
71
+ os << "### Retained entries" << std::endl;
72
+ for (const auto& it: *this) {
73
+ os << it.first << ": " << it.second << std::endl;
74
+ }
75
+ os << "### End retained entries" << std::endl;
76
+ }
77
+ return os.str();
78
+ }
79
+
80
+ // update sketch
81
+
82
+ template<typename S, typename U, typename P, typename A>
83
+ update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
84
+ policy_(policy),
85
+ map_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator)
86
+ {}
87
+
88
+ template<typename S, typename U, typename P, typename A>
89
+ A update_tuple_sketch<S, U, P, A>::get_allocator() const {
90
+ return map_.allocator_;
91
+ }
92
+
93
+ template<typename S, typename U, typename P, typename A>
94
+ bool update_tuple_sketch<S, U, P, A>::is_empty() const {
95
+ return map_.is_empty_;
96
+ }
97
+
98
+ template<typename S, typename U, typename P, typename A>
99
+ bool update_tuple_sketch<S, U, P, A>::is_ordered() const {
100
+ return false;
101
+ }
102
+
103
+ template<typename S, typename U, typename P, typename A>
104
+ uint64_t update_tuple_sketch<S, U, P, A>::get_theta64() const {
105
+ return map_.theta_;
106
+ }
107
+
108
+ template<typename S, typename U, typename P, typename A>
109
+ uint32_t update_tuple_sketch<S, U, P, A>::get_num_retained() const {
110
+ return map_.num_entries_;
111
+ }
112
+
113
+ template<typename S, typename U, typename P, typename A>
114
+ uint16_t update_tuple_sketch<S, U, P, A>::get_seed_hash() const {
115
+ return compute_seed_hash(map_.seed_);
116
+ }
117
+
118
+ template<typename S, typename U, typename P, typename A>
119
+ uint8_t update_tuple_sketch<S, U, P, A>::get_lg_k() const {
120
+ return map_.lg_nom_size_;
121
+ }
122
+
123
+ template<typename S, typename U, typename P, typename A>
124
+ auto update_tuple_sketch<S, U, P, A>::get_rf() const -> resize_factor {
125
+ return map_.rf_;
126
+ }
127
+
128
+ template<typename S, typename U, typename P, typename A>
129
+ template<typename UU>
130
+ void update_tuple_sketch<S, U, P, A>::update(uint64_t key, UU&& value) {
131
+ update(&key, sizeof(key), std::forward<UU>(value));
132
+ }
133
+
134
+ template<typename S, typename U, typename P, typename A>
135
+ template<typename UU>
136
+ void update_tuple_sketch<S, U, P, A>::update(int64_t key, UU&& value) {
137
+ update(&key, sizeof(key), std::forward<UU>(value));
138
+ }
139
+
140
+ template<typename S, typename U, typename P, typename A>
141
+ template<typename UU>
142
+ void update_tuple_sketch<S, U, P, A>::update(uint32_t key, UU&& value) {
143
+ update(static_cast<int32_t>(key), std::forward<UU>(value));
144
+ }
145
+
146
+ template<typename S, typename U, typename P, typename A>
147
+ template<typename UU>
148
+ void update_tuple_sketch<S, U, P, A>::update(int32_t key, UU&& value) {
149
+ update(static_cast<int64_t>(key), std::forward<UU>(value));
150
+ }
151
+
152
+ template<typename S, typename U, typename P, typename A>
153
+ template<typename UU>
154
+ void update_tuple_sketch<S, U, P, A>::update(uint16_t key, UU&& value) {
155
+ update(static_cast<int16_t>(key), std::forward<UU>(value));
156
+ }
157
+
158
+ template<typename S, typename U, typename P, typename A>
159
+ template<typename UU>
160
+ void update_tuple_sketch<S, U, P, A>::update(int16_t key, UU&& value) {
161
+ update(static_cast<int64_t>(key), std::forward<UU>(value));
162
+ }
163
+
164
+ template<typename S, typename U, typename P, typename A>
165
+ template<typename UU>
166
+ void update_tuple_sketch<S, U, P, A>::update(uint8_t key, UU&& value) {
167
+ update(static_cast<int8_t>(key), std::forward<UU>(value));
168
+ }
169
+
170
+ template<typename S, typename U, typename P, typename A>
171
+ template<typename UU>
172
+ void update_tuple_sketch<S, U, P, A>::update(int8_t key, UU&& value) {
173
+ update(static_cast<int64_t>(key), std::forward<UU>(value));
174
+ }
175
+
176
+ template<typename S, typename U, typename P, typename A>
177
+ template<typename UU>
178
+ void update_tuple_sketch<S, U, P, A>::update(const std::string& key, UU&& value) {
179
+ if (key.empty()) return;
180
+ update(key.c_str(), key.length(), std::forward<UU>(value));
181
+ }
182
+
183
+ template<typename S, typename U, typename P, typename A>
184
+ template<typename UU>
185
+ void update_tuple_sketch<S, U, P, A>::update(double key, UU&& value) {
186
+ update(canonical_double(key), std::forward<UU>(value));
187
+ }
188
+
189
+ template<typename S, typename U, typename P, typename A>
190
+ template<typename UU>
191
+ void update_tuple_sketch<S, U, P, A>::update(float key, UU&& value) {
192
+ update(static_cast<double>(key), std::forward<UU>(value));
193
+ }
194
+
195
+ template<typename S, typename U, typename P, typename A>
196
+ template<typename UU>
197
+ void update_tuple_sketch<S, U, P, A>::update(const void* key, size_t length, UU&& value) {
198
+ const uint64_t hash = map_.hash_and_screen(key, length);
199
+ if (hash == 0) return;
200
+ auto result = map_.find(hash);
201
+ if (!result.second) {
202
+ S summary = policy_.create();
203
+ policy_.update(summary, std::forward<UU>(value));
204
+ map_.insert(result.first, Entry(hash, std::move(summary)));
205
+ } else {
206
+ policy_.update((*result.first).second, std::forward<UU>(value));
207
+ }
208
+ }
209
+
210
+ template<typename S, typename U, typename P, typename A>
211
+ void update_tuple_sketch<S, U, P, A>::trim() {
212
+ map_.trim();
213
+ }
214
+
215
+ template<typename S, typename U, typename P, typename A>
216
+ auto update_tuple_sketch<S, U, P, A>::begin() -> iterator {
217
+ return iterator(map_.entries_, 1 << map_.lg_cur_size_, 0);
218
+ }
219
+
220
+ template<typename S, typename U, typename P, typename A>
221
+ auto update_tuple_sketch<S, U, P, A>::end() -> iterator {
222
+ return iterator(nullptr, 0, 1 << map_.lg_cur_size_);
223
+ }
224
+
225
+ template<typename S, typename U, typename P, typename A>
226
+ auto update_tuple_sketch<S, U, P, A>::begin() const -> const_iterator {
227
+ return const_iterator(map_.entries_, 1 << map_.lg_cur_size_, 0);
228
+ }
229
+
230
+ template<typename S, typename U, typename P, typename A>
231
+ auto update_tuple_sketch<S, U, P, A>::end() const -> const_iterator {
232
+ return const_iterator(nullptr, 0, 1 << map_.lg_cur_size_);
233
+ }
234
+
235
+ template<typename S, typename U, typename P, typename A>
236
+ compact_tuple_sketch<S, A> update_tuple_sketch<S, U, P, A>::compact(bool ordered) const {
237
+ return compact_tuple_sketch<S, A>(*this, ordered);
238
+ }
239
+
240
+ template<typename S, typename U, typename P, typename A>
241
+ void update_tuple_sketch<S, U, P, A>::print_specifics(std::basic_ostream<char>& os) const {
242
+ os << " lg nominal size : " << (int) map_.lg_nom_size_ << std::endl;
243
+ os << " lg current size : " << (int) map_.lg_cur_size_ << std::endl;
244
+ os << " resize factor : " << (1 << map_.rf_) << std::endl;
245
+ }
246
+
247
+ // compact sketch
248
+
249
+ template<typename S, typename A>
250
+ compact_tuple_sketch<S, A>::compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
251
+ std::vector<Entry, AllocEntry>&& entries):
252
+ is_empty_(is_empty),
253
+ is_ordered_(is_ordered),
254
+ seed_hash_(seed_hash),
255
+ theta_(theta),
256
+ entries_(std::move(entries))
257
+ {}
258
+
259
+ template<typename S, typename A>
260
+ compact_tuple_sketch<S, A>::compact_tuple_sketch(const Base& other, bool ordered):
261
+ is_empty_(other.is_empty()),
262
+ is_ordered_(other.is_ordered() || ordered),
263
+ seed_hash_(other.get_seed_hash()),
264
+ theta_(other.get_theta64()),
265
+ entries_(other.get_allocator())
266
+ {
267
+ entries_.reserve(other.get_num_retained());
268
+ std::copy(other.begin(), other.end(), std::back_inserter(entries_));
269
+ if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end(), comparator());
270
+ }
271
+
272
+ template<typename S, typename A>
273
+ compact_tuple_sketch<S, A>::compact_tuple_sketch(compact_tuple_sketch&& other) noexcept:
274
+ is_empty_(other.is_empty()),
275
+ is_ordered_(other.is_ordered()),
276
+ seed_hash_(other.get_seed_hash()),
277
+ theta_(other.get_theta64()),
278
+ entries_(std::move(other.entries_))
279
+ {}
280
+
281
+ template<typename S, typename A>
282
+ compact_tuple_sketch<S, A>::compact_tuple_sketch(const theta_sketch_experimental<AllocU64>& other, const S& summary, bool ordered):
283
+ is_empty_(other.is_empty()),
284
+ is_ordered_(other.is_ordered() || ordered),
285
+ seed_hash_(other.get_seed_hash()),
286
+ theta_(other.get_theta64()),
287
+ entries_(other.get_allocator())
288
+ {
289
+ entries_.reserve(other.get_num_retained());
290
+ for (uint64_t hash: other) {
291
+ entries_.push_back(Entry(hash, summary));
292
+ }
293
+ if (ordered && !other.is_ordered()) std::sort(entries_.begin(), entries_.end(), comparator());
294
+ }
295
+
296
+ template<typename S, typename A>
297
+ A compact_tuple_sketch<S, A>::get_allocator() const {
298
+ return entries_.get_allocator();
299
+ }
300
+
301
+ template<typename S, typename A>
302
+ bool compact_tuple_sketch<S, A>::is_empty() const {
303
+ return is_empty_;
304
+ }
305
+
306
+ template<typename S, typename A>
307
+ bool compact_tuple_sketch<S, A>::is_ordered() const {
308
+ return is_ordered_;
309
+ }
310
+
311
+ template<typename S, typename A>
312
+ uint64_t compact_tuple_sketch<S, A>::get_theta64() const {
313
+ return theta_;
314
+ }
315
+
316
+ template<typename S, typename A>
317
+ uint32_t compact_tuple_sketch<S, A>::get_num_retained() const {
318
+ return entries_.size();
319
+ }
320
+
321
+ template<typename S, typename A>
322
+ uint16_t compact_tuple_sketch<S, A>::get_seed_hash() const {
323
+ return seed_hash_;
324
+ }
325
+
326
+ // implementation for fixed-size arithmetic types (integral and floating point)
327
+ template<typename S, typename A>
328
+ template<typename SD, typename SS, typename std::enable_if<std::is_arithmetic<SS>::value, int>::type>
329
+ size_t compact_tuple_sketch<S, A>::get_serialized_size_summaries_bytes(const SD& sd) const {
330
+ unused(sd);
331
+ return entries_.size() * sizeof(SS);
332
+ }
333
+
334
+ // implementation for all other types (non-arithmetic)
335
+ template<typename S, typename A>
336
+ template<typename SD, typename SS, typename std::enable_if<!std::is_arithmetic<SS>::value, int>::type>
337
+ size_t compact_tuple_sketch<S, A>::get_serialized_size_summaries_bytes(const SD& sd) const {
338
+ size_t size = 0;
339
+ for (const auto& it: entries_) {
340
+ size += sd.size_of_item(it.second);
341
+ }
342
+ return size;
343
+ }
344
+
345
+ template<typename S, typename A>
346
+ template<typename SerDe>
347
+ void compact_tuple_sketch<S, A>::serialize(std::ostream& os, const SerDe& sd) const {
348
+ const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
349
+ const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
350
+ os.write(reinterpret_cast<const char*>(&preamble_longs), sizeof(preamble_longs));
351
+ const uint8_t serial_version = SERIAL_VERSION;
352
+ os.write(reinterpret_cast<const char*>(&serial_version), sizeof(serial_version));
353
+ const uint8_t family = SKETCH_FAMILY;
354
+ os.write(reinterpret_cast<const char*>(&family), sizeof(family));
355
+ const uint8_t type = SKETCH_TYPE;
356
+ os.write(reinterpret_cast<const char*>(&type), sizeof(type));
357
+ const uint8_t unused8 = 0;
358
+ os.write(reinterpret_cast<const char*>(&unused8), sizeof(unused8));
359
+ const uint8_t flags_byte(
360
+ (1 << flags::IS_COMPACT) |
361
+ (1 << flags::IS_READ_ONLY) |
362
+ (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
363
+ (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
364
+ );
365
+ os.write(reinterpret_cast<const char*>(&flags_byte), sizeof(flags_byte));
366
+ const uint16_t seed_hash = get_seed_hash();
367
+ os.write(reinterpret_cast<const char*>(&seed_hash), sizeof(seed_hash));
368
+ if (!this->is_empty()) {
369
+ if (!is_single_item) {
370
+ const uint32_t num_entries = entries_.size();
371
+ os.write(reinterpret_cast<const char*>(&num_entries), sizeof(num_entries));
372
+ const uint32_t unused32 = 0;
373
+ os.write(reinterpret_cast<const char*>(&unused32), sizeof(unused32));
374
+ if (this->is_estimation_mode()) {
375
+ os.write(reinterpret_cast<const char*>(&(this->theta_)), sizeof(uint64_t));
376
+ }
377
+ }
378
+ for (const auto& it: entries_) {
379
+ os.write(reinterpret_cast<const char*>(&it.first), sizeof(uint64_t));
380
+ sd.serialize(os, &it.second, 1);
381
+ }
382
+ }
383
+ }
384
+
385
+ template<typename S, typename A>
386
+ template<typename SerDe>
387
+ auto compact_tuple_sketch<S, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
388
+ const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
389
+ const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
390
+ const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs
391
+ + sizeof(uint64_t) * entries_.size() + get_serialized_size_summaries_bytes(sd);
392
+ vector_bytes bytes(size, 0, entries_.get_allocator());
393
+ uint8_t* ptr = bytes.data() + header_size_bytes;
394
+ const uint8_t* end_ptr = ptr + size;
395
+
396
+ ptr += copy_to_mem(&preamble_longs, ptr, sizeof(preamble_longs));
397
+ const uint8_t serial_version = SERIAL_VERSION;
398
+ ptr += copy_to_mem(&serial_version, ptr, sizeof(serial_version));
399
+ const uint8_t family = SKETCH_FAMILY;
400
+ ptr += copy_to_mem(&family, ptr, sizeof(family));
401
+ const uint8_t type = SKETCH_TYPE;
402
+ ptr += copy_to_mem(&type, ptr, sizeof(type));
403
+ const uint8_t unused8 = 0;
404
+ ptr += copy_to_mem(&unused8, ptr, sizeof(unused8));
405
+ const uint8_t flags_byte(
406
+ (1 << flags::IS_COMPACT) |
407
+ (1 << flags::IS_READ_ONLY) |
408
+ (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
409
+ (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
410
+ );
411
+ ptr += copy_to_mem(&flags_byte, ptr, sizeof(flags_byte));
412
+ const uint16_t seed_hash = get_seed_hash();
413
+ ptr += copy_to_mem(&seed_hash, ptr, sizeof(seed_hash));
414
+ if (!this->is_empty()) {
415
+ if (!is_single_item) {
416
+ const uint32_t num_entries = entries_.size();
417
+ ptr += copy_to_mem(&num_entries, ptr, sizeof(num_entries));
418
+ const uint32_t unused32 = 0;
419
+ ptr += copy_to_mem(&unused32, ptr, sizeof(unused32));
420
+ if (this->is_estimation_mode()) {
421
+ ptr += copy_to_mem(&theta_, ptr, sizeof(uint64_t));
422
+ }
423
+ }
424
+ for (const auto& it: entries_) {
425
+ ptr += copy_to_mem(&it.first, ptr, sizeof(uint64_t));
426
+ ptr += sd.serialize(ptr, end_ptr - ptr, &it.second, 1);
427
+ }
428
+ }
429
+ return bytes;
430
+ }
431
+
432
+ template<typename S, typename A>
433
+ template<typename SerDe>
434
+ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(std::istream& is, uint64_t seed, const SerDe& sd, const A& allocator) {
435
+ uint8_t preamble_longs;
436
+ is.read(reinterpret_cast<char*>(&preamble_longs), sizeof(preamble_longs));
437
+ uint8_t serial_version;
438
+ is.read(reinterpret_cast<char*>(&serial_version), sizeof(serial_version));
439
+ uint8_t family;
440
+ is.read(reinterpret_cast<char*>(&family), sizeof(family));
441
+ uint8_t type;
442
+ is.read(reinterpret_cast<char*>(&type), sizeof(type));
443
+ uint8_t unused8;
444
+ is.read(reinterpret_cast<char*>(&unused8), sizeof(unused8));
445
+ uint8_t flags_byte;
446
+ is.read(reinterpret_cast<char*>(&flags_byte), sizeof(flags_byte));
447
+ uint16_t seed_hash;
448
+ is.read(reinterpret_cast<char*>(&seed_hash), sizeof(seed_hash));
449
+ checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
450
+ checker<true>::check_sketch_family(family, SKETCH_FAMILY);
451
+ checker<true>::check_sketch_type(type, SKETCH_TYPE);
452
+ const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
453
+ if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
454
+
455
+ uint64_t theta = theta_constants::MAX_THETA;
456
+ uint32_t num_entries = 0;
457
+ if (!is_empty) {
458
+ if (preamble_longs == 1) {
459
+ num_entries = 1;
460
+ } else {
461
+ is.read(reinterpret_cast<char*>(&num_entries), sizeof(num_entries));
462
+ uint32_t unused32;
463
+ is.read(reinterpret_cast<char*>(&unused32), sizeof(unused32));
464
+ if (preamble_longs > 2) {
465
+ is.read(reinterpret_cast<char*>(&theta), sizeof(theta));
466
+ }
467
+ }
468
+ }
469
+ A alloc(allocator);
470
+ std::vector<Entry, AllocEntry> entries(alloc);
471
+ if (!is_empty) {
472
+ entries.reserve(num_entries);
473
+ std::unique_ptr<S, deleter_of_summaries> summary(alloc.allocate(1), deleter_of_summaries(1, false));
474
+ for (size_t i = 0; i < num_entries; ++i) {
475
+ uint64_t key;
476
+ is.read(reinterpret_cast<char*>(&key), sizeof(uint64_t));
477
+ sd.deserialize(is, summary.get(), 1);
478
+ entries.push_back(Entry(key, std::move(*summary)));
479
+ (*summary).~S();
480
+ }
481
+ }
482
+ if (!is.good()) throw std::runtime_error("error reading from std::istream");
483
+ const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
484
+ return compact_tuple_sketch(is_empty, is_ordered, seed_hash, theta, std::move(entries));
485
+ }
486
+
487
+ template<typename S, typename A>
488
+ template<typename SerDe>
489
+ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* bytes, size_t size, uint64_t seed, const SerDe& sd, const A& allocator) {
490
+ ensure_minimum_memory(size, 8);
491
+ const char* ptr = static_cast<const char*>(bytes);
492
+ const char* base = ptr;
493
+ uint8_t preamble_longs;
494
+ ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
495
+ uint8_t serial_version;
496
+ ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
497
+ uint8_t family;
498
+ ptr += copy_from_mem(ptr, &family, sizeof(family));
499
+ uint8_t type;
500
+ ptr += copy_from_mem(ptr, &type, sizeof(type));
501
+ uint8_t unused8;
502
+ ptr += copy_from_mem(ptr, &unused8, sizeof(unused8));
503
+ uint8_t flags_byte;
504
+ ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
505
+ uint16_t seed_hash;
506
+ ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
507
+ checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
508
+ checker<true>::check_sketch_family(family, SKETCH_FAMILY);
509
+ checker<true>::check_sketch_type(type, SKETCH_TYPE);
510
+ const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
511
+ if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
512
+
513
+ uint64_t theta = theta_constants::MAX_THETA;
514
+ uint32_t num_entries = 0;
515
+
516
+ if (!is_empty) {
517
+ if (preamble_longs == 1) {
518
+ num_entries = 1;
519
+ } else {
520
+ ensure_minimum_memory(size, 8); // read the first prelong before this method
521
+ ptr += copy_from_mem(ptr, &num_entries, sizeof(num_entries));
522
+ uint32_t unused32;
523
+ ptr += copy_from_mem(ptr, &unused32, sizeof(unused32));
524
+ if (preamble_longs > 2) {
525
+ ensure_minimum_memory(size, (preamble_longs - 1) << 3);
526
+ ptr += copy_from_mem(ptr, &theta, sizeof(theta));
527
+ }
528
+ }
529
+ }
530
+ const size_t keys_size_bytes = sizeof(uint64_t) * num_entries;
531
+ ensure_minimum_memory(size, ptr - base + keys_size_bytes);
532
+ A alloc(allocator);
533
+ std::vector<Entry, AllocEntry> entries(alloc);
534
+ if (!is_empty) {
535
+ entries.reserve(num_entries);
536
+ std::unique_ptr<S, deleter_of_summaries> summary(alloc.allocate(1), deleter_of_summaries(1, false));
537
+ for (size_t i = 0; i < num_entries; ++i) {
538
+ uint64_t key;
539
+ ptr += copy_from_mem(ptr, &key, sizeof(key));
540
+ ptr += sd.deserialize(ptr, base + size - ptr, summary.get(), 1);
541
+ entries.push_back(Entry(key, std::move(*summary)));
542
+ (*summary).~S();
543
+ }
544
+ }
545
+ const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
546
+ return compact_tuple_sketch(is_empty, is_ordered, seed_hash, theta, std::move(entries));
547
+ }
548
+
549
+ template<typename S, typename A>
550
+ auto compact_tuple_sketch<S, A>::begin() -> iterator {
551
+ return iterator(entries_.data(), entries_.size(), 0);
552
+ }
553
+
554
+ template<typename S, typename A>
555
+ auto compact_tuple_sketch<S, A>::end() -> iterator {
556
+ return iterator(nullptr, 0, entries_.size());
557
+ }
558
+
559
+ template<typename S, typename A>
560
+ auto compact_tuple_sketch<S, A>::begin() const -> const_iterator {
561
+ return const_iterator(entries_.data(), entries_.size(), 0);
562
+ }
563
+
564
+ template<typename S, typename A>
565
+ auto compact_tuple_sketch<S, A>::end() const -> const_iterator {
566
+ return const_iterator(nullptr, 0, entries_.size());
567
+ }
568
+
569
+ template<typename S, typename A>
570
+ void compact_tuple_sketch<S, A>::print_specifics(std::basic_ostream<char>&) const {}
571
+
572
+ // builder
573
+
574
+ template<typename D, typename P, typename A>
575
+ tuple_base_builder<D, P, A>::tuple_base_builder(const P& policy, const A& allocator):
576
+ theta_base_builder<D, A>(allocator), policy_(policy) {}
577
+
578
+ template<typename S, typename U, typename P, typename A>
579
+ update_tuple_sketch<S, U, P, A>::builder::builder(const P& policy, const A& allocator):
580
+ tuple_base_builder<builder, P, A>(policy, allocator) {}
581
+
582
+ template<typename S, typename U, typename P, typename A>
583
+ auto update_tuple_sketch<S, U, P, A>::builder::build() const -> update_tuple_sketch {
584
+ return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
585
+ }
586
+
587
+ } /* namespace datasketches */