datasketches 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,40 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _COMPOSITEINTERPOLATIONXTABLE_HPP_
21
+ #define _COMPOSITEINTERPOLATIONXTABLE_HPP_
22
+
23
+ #include <memory>
24
+
25
+ namespace datasketches {
26
+
27
+ template<typename A = std::allocator<char>>
28
+ class CompositeInterpolationXTable {
29
+ public:
30
+ static int get_y_stride(int logK);
31
+
32
+ static const double* get_x_arr(int logK);
33
+ static int get_x_arr_length();
34
+ };
35
+
36
+ }
37
+
38
+ #include "CompositeInterpolationXTable-internal.hpp"
39
+
40
+ #endif /* _COMPOSITEINTERPOLATIONXTABLE_HPP_ */
@@ -0,0 +1,291 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _COUPONHASHSET_INTERNAL_HPP_
21
+ #define _COUPONHASHSET_INTERNAL_HPP_
22
+
23
+ #include "CouponHashSet.hpp"
24
+
25
+ #include <cstring>
26
+ #include <exception>
27
+
28
+ namespace datasketches {
29
+
30
+ template<typename A>
31
+ static int find(const int* array, const int lgArrInts, const int coupon);
32
+
33
+ template<typename A>
34
+ CouponHashSet<A>::CouponHashSet(const int lgConfigK, const target_hll_type tgtHllType)
35
+ : CouponList<A>(lgConfigK, tgtHllType, hll_mode::SET)
36
+ {
37
+ if (lgConfigK <= 7) {
38
+ throw std::invalid_argument("CouponHashSet must be initialized with lgConfigK > 7. Found: "
39
+ + std::to_string(lgConfigK));
40
+ }
41
+ }
42
+
43
+ template<typename A>
44
+ CouponHashSet<A>::CouponHashSet(const CouponHashSet<A>& that)
45
+ : CouponList<A>(that) {}
46
+
47
+ template<typename A>
48
+ CouponHashSet<A>::CouponHashSet(const CouponHashSet<A>& that, const target_hll_type tgtHllType)
49
+ : CouponList<A>(that, tgtHllType) {}
50
+
51
+ template<typename A>
52
+ CouponHashSet<A>::~CouponHashSet() {}
53
+
54
+ template<typename A>
55
+ std::function<void(HllSketchImpl<A>*)> CouponHashSet<A>::get_deleter() const {
56
+ return [](HllSketchImpl<A>* ptr) {
57
+ CouponHashSet<A>* chs = static_cast<CouponHashSet<A>*>(ptr);
58
+ chs->~CouponHashSet();
59
+ chsAlloc().deallocate(chs, 1);
60
+ };
61
+ }
62
+
63
+ template<typename A>
64
+ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) {
65
+ if (len < HllUtil<A>::HASH_SET_INT_ARR_START) { // hard-coded
66
+ throw std::out_of_range("Input data length insufficient to hold CouponHashSet");
67
+ }
68
+
69
+ const uint8_t* data = static_cast<const uint8_t*>(bytes);
70
+ if (data[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::HASH_SET_PREINTS) {
71
+ throw std::invalid_argument("Incorrect number of preInts in input stream");
72
+ }
73
+ if (data[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
74
+ throw std::invalid_argument("Wrong ser ver in input stream");
75
+ }
76
+ if (data[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
77
+ throw std::invalid_argument("Input stream is not an HLL sketch");
78
+ }
79
+
80
+ const hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
81
+ if (mode != SET) {
82
+ throw std::invalid_argument("Calling set construtor with non-set mode data");
83
+ }
84
+
85
+ const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
86
+
87
+ const int lgK = data[HllUtil<A>::LG_K_BYTE];
88
+ if (lgK <= 7) {
89
+ throw std::invalid_argument("Attempt to deserialize invalid CouponHashSet with lgConfigK <= 7. Found: "
90
+ + std::to_string(lgK));
91
+ }
92
+ int lgArrInts = data[HllUtil<A>::LG_ARR_BYTE];
93
+ const bool compactFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
94
+
95
+ int couponCount;
96
+ std::memcpy(&couponCount, data + HllUtil<A>::HASH_SET_COUNT_INT, sizeof(couponCount));
97
+ if (lgArrInts < HllUtil<A>::LG_INIT_SET_SIZE) {
98
+ lgArrInts = HllUtil<A>::computeLgArrInts(SET, couponCount, lgK);
99
+ }
100
+ // Don't set couponCount in sketch here;
101
+ // we'll set later if updatable, and increment with updates if compact
102
+ const int couponsInArray = (compactFlag ? couponCount : (1 << lgArrInts));
103
+ const size_t expectedLength = HllUtil<A>::HASH_SET_INT_ARR_START + (couponsInArray * sizeof(int));
104
+ if (len < expectedLength) {
105
+ throw std::out_of_range("Byte array too short for sketch. Expected " + std::to_string(expectedLength)
106
+ + ", found: " + std::to_string(len));
107
+ }
108
+
109
+ CouponHashSet<A>* sketch = new (chsAlloc().allocate(1)) CouponHashSet<A>(lgK, tgtHllType);
110
+
111
+ if (compactFlag) {
112
+ const uint8_t* curPos = data + HllUtil<A>::HASH_SET_INT_ARR_START;
113
+ int coupon;
114
+ for (int i = 0; i < couponCount; ++i, curPos += sizeof(coupon)) {
115
+ std::memcpy(&coupon, curPos, sizeof(coupon));
116
+ sketch->couponUpdate(coupon);
117
+ }
118
+ } else {
119
+ int* oldArr = sketch->couponIntArr;
120
+ const size_t oldArrLen = 1 << sketch->lgCouponArrInts;
121
+ sketch->lgCouponArrInts = lgArrInts;
122
+ typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
123
+ sketch->couponIntArr = intAlloc().allocate(1 << lgArrInts);
124
+ sketch->couponCount = couponCount;
125
+ // only need to read valid coupons, unlike in stream case
126
+ std::memcpy(sketch->couponIntArr,
127
+ data + HllUtil<A>::HASH_SET_INT_ARR_START,
128
+ couponCount * sizeof(int));
129
+ intAlloc().deallocate(oldArr, oldArrLen);
130
+ }
131
+
132
+ return sketch;
133
+ }
134
+
135
+ template<typename A>
136
+ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
137
+ uint8_t listHeader[8];
138
+ is.read((char*)listHeader, 8 * sizeof(uint8_t));
139
+
140
+ if (listHeader[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::HASH_SET_PREINTS) {
141
+ throw std::invalid_argument("Incorrect number of preInts in input stream");
142
+ }
143
+ if (listHeader[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
144
+ throw std::invalid_argument("Wrong ser ver in input stream");
145
+ }
146
+ if (listHeader[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
147
+ throw std::invalid_argument("Input stream is not an HLL sketch");
148
+ }
149
+
150
+ hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
151
+ if (mode != SET) {
152
+ throw std::invalid_argument("Calling set construtor with non-set mode data");
153
+ }
154
+
155
+ target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
156
+
157
+ const int lgK = listHeader[HllUtil<A>::LG_K_BYTE];
158
+ if (lgK <= 7) {
159
+ throw std::invalid_argument("Attempt to deserialize invalid CouponHashSet with lgConfigK <= 7. Found: "
160
+ + std::to_string(lgK));
161
+ }
162
+ int lgArrInts = listHeader[HllUtil<A>::LG_ARR_BYTE];
163
+ const bool compactFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
164
+
165
+ int couponCount;
166
+ is.read((char*)&couponCount, sizeof(couponCount));
167
+ if (lgArrInts < HllUtil<A>::LG_INIT_SET_SIZE) {
168
+ lgArrInts = HllUtil<A>::computeLgArrInts(SET, couponCount, lgK);
169
+ }
170
+
171
+ CouponHashSet<A>* sketch = new (chsAlloc().allocate(1)) CouponHashSet<A>(lgK, tgtHllType);
172
+ typedef std::unique_ptr<CouponHashSet<A>, std::function<void(HllSketchImpl<A>*)>> coupon_hash_set_ptr;
173
+ coupon_hash_set_ptr ptr(sketch, sketch->get_deleter());
174
+
175
+ // Don't set couponCount here;
176
+ // we'll set later if updatable, and increment with updates if compact
177
+ if (compactFlag) {
178
+ for (int i = 0; i < couponCount; ++i) {
179
+ int coupon;
180
+ is.read((char*)&coupon, sizeof(coupon));
181
+ sketch->couponUpdate(coupon);
182
+ }
183
+ } else {
184
+ typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
185
+ intAlloc().deallocate(sketch->couponIntArr, 1 << sketch->lgCouponArrInts);
186
+ sketch->lgCouponArrInts = lgArrInts;
187
+ sketch->couponIntArr = intAlloc().allocate(1 << lgArrInts);
188
+ sketch->couponCount = couponCount;
189
+ // for stream processing, read entire list so read pointer ends up set correctly
190
+ is.read((char*)sketch->couponIntArr, (1 << sketch->lgCouponArrInts) * sizeof(int));
191
+ }
192
+
193
+ if (!is.good())
194
+ throw std::runtime_error("error reading from std::istream");
195
+
196
+ return ptr.release();
197
+ }
198
+
199
+ template<typename A>
200
+ CouponHashSet<A>* CouponHashSet<A>::copy() const {
201
+ return new (chsAlloc().allocate(1)) CouponHashSet<A>(*this);
202
+ }
203
+
204
+ template<typename A>
205
+ CouponHashSet<A>* CouponHashSet<A>::copyAs(const target_hll_type tgtHllType) const {
206
+ return new (chsAlloc().allocate(1)) CouponHashSet<A>(*this, tgtHllType);
207
+ }
208
+
209
+ template<typename A>
210
+ HllSketchImpl<A>* CouponHashSet<A>::couponUpdate(int coupon) {
211
+ const int index = find<A>(this->couponIntArr, this->lgCouponArrInts, coupon);
212
+ if (index >= 0) {
213
+ return this; // found duplicate, ignore
214
+ }
215
+ this->couponIntArr[~index] = coupon; // found empty
216
+ ++this->couponCount;
217
+ if (checkGrowOrPromote()) {
218
+ return this->promoteHeapListOrSetToHll(*this);
219
+ }
220
+ return this;
221
+ }
222
+
223
+ template<typename A>
224
+ int CouponHashSet<A>::getMemDataStart() const {
225
+ return HllUtil<A>::HASH_SET_INT_ARR_START;
226
+ }
227
+
228
+ template<typename A>
229
+ int CouponHashSet<A>::getPreInts() const {
230
+ return HllUtil<A>::HASH_SET_PREINTS;
231
+ }
232
+
233
+ template<typename A>
234
+ bool CouponHashSet<A>::checkGrowOrPromote() {
235
+ if ((HllUtil<A>::RESIZE_DENOM * this->couponCount) > (HllUtil<A>::RESIZE_NUMER * (1 << this->lgCouponArrInts))) {
236
+ if (this->lgCouponArrInts == (this->lgConfigK - 3)) { // at max size
237
+ return true; // promote to HLL
238
+ }
239
+ int tgtLgCoupArrSize = this->lgCouponArrInts + 1;
240
+ growHashSet(this->lgCouponArrInts, tgtLgCoupArrSize);
241
+ }
242
+ return false;
243
+ }
244
+
245
+ template<typename A>
246
+ void CouponHashSet<A>::growHashSet(const int srcLgCoupArrSize, const int tgtLgCoupArrSize) {
247
+ const int tgtLen = 1 << tgtLgCoupArrSize;
248
+ typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
249
+ int* tgtCouponIntArr = intAlloc().allocate(tgtLen);
250
+ std::fill(tgtCouponIntArr, tgtCouponIntArr + tgtLen, 0);
251
+
252
+ const int srcLen = 1 << srcLgCoupArrSize;
253
+ for (int i = 0; i < srcLen; ++i) { // scan existing array for non-zero values
254
+ const int fetched = this->couponIntArr[i];
255
+ if (fetched != HllUtil<A>::EMPTY) {
256
+ const int idx = find<A>(tgtCouponIntArr, tgtLgCoupArrSize, fetched); // search TGT array
257
+ if (idx < 0) { // found EMPTY
258
+ tgtCouponIntArr[~idx] = fetched; // insert
259
+ continue;
260
+ }
261
+ throw std::runtime_error("Error: Found duplicate coupon");
262
+ }
263
+ }
264
+
265
+ intAlloc().deallocate(this->couponIntArr, 1 << this->lgCouponArrInts);
266
+ this->couponIntArr = tgtCouponIntArr;
267
+ this->lgCouponArrInts = tgtLgCoupArrSize;
268
+ }
269
+
270
+ template<typename A>
271
+ static int find(const int* array, const int lgArrInts, const int coupon) {
272
+ const int arrMask = (1 << lgArrInts) - 1;
273
+ int probe = coupon & arrMask;
274
+ const int loopIndex = probe;
275
+ do {
276
+ const int couponAtIdx = array[probe];
277
+ if (couponAtIdx == HllUtil<A>::EMPTY) {
278
+ return ~probe; //empty
279
+ }
280
+ else if (coupon == couponAtIdx) {
281
+ return probe; //duplicate
282
+ }
283
+ const int stride = ((coupon & HllUtil<A>::KEY_MASK_26) >> lgArrInts) | 1;
284
+ probe = (probe + stride) & arrMask;
285
+ } while (probe != loopIndex);
286
+ throw std::invalid_argument("Key not found and no empty slots!");
287
+ }
288
+
289
+ }
290
+
291
+ #endif // _COUPONHASHSET_INTERNAL_HPP_
@@ -0,0 +1,59 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _COUPONHASHSET_HPP_
21
+ #define _COUPONHASHSET_HPP_
22
+
23
+ #include "CouponList.hpp"
24
+
25
+ namespace datasketches {
26
+
27
+ template<typename A = std::allocator<char>>
28
+ class CouponHashSet : public CouponList<A> {
29
+ public:
30
+ static CouponHashSet* newSet(const void* bytes, size_t len);
31
+ static CouponHashSet* newSet(std::istream& is);
32
+ explicit CouponHashSet(int lgConfigK, target_hll_type tgtHllType);
33
+ explicit CouponHashSet(const CouponHashSet& that, target_hll_type tgtHllType);
34
+ explicit CouponHashSet(const CouponHashSet& that);
35
+
36
+ virtual ~CouponHashSet();
37
+ virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
38
+
39
+ protected:
40
+
41
+ virtual CouponHashSet* copy() const;
42
+ virtual CouponHashSet* copyAs(target_hll_type tgtHllType) const;
43
+
44
+ virtual HllSketchImpl<A>* couponUpdate(int coupon);
45
+
46
+ virtual int getMemDataStart() const;
47
+ virtual int getPreInts() const;
48
+
49
+ friend class HllSketchImplFactory<A>;
50
+
51
+ private:
52
+ typedef typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>> chsAlloc;
53
+ bool checkGrowOrPromote();
54
+ void growHashSet(int srcLgCoupArrSize, int tgtLgCoupArrSize);
55
+ };
56
+
57
+ }
58
+
59
+ #endif /* _COUPONHASHSET_HPP_ */
@@ -0,0 +1,417 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _COUPONLIST_INTERNAL_HPP_
21
+ #define _COUPONLIST_INTERNAL_HPP_
22
+
23
+ #include "CouponList.hpp"
24
+ #include "CubicInterpolation.hpp"
25
+ #include "HllUtil.hpp"
26
+
27
+ #include <algorithm>
28
+ #include <cmath>
29
+
30
+ namespace datasketches {
31
+
32
+ template<typename A>
33
+ CouponList<A>::CouponList(const int lgConfigK, const target_hll_type tgtHllType, const hll_mode mode)
34
+ : HllSketchImpl<A>(lgConfigK, tgtHllType, mode, false) {
35
+ if (mode == hll_mode::LIST) {
36
+ lgCouponArrInts = HllUtil<A>::LG_INIT_LIST_SIZE;
37
+ } else { // mode == SET
38
+ lgCouponArrInts = HllUtil<A>::LG_INIT_SET_SIZE;
39
+ }
40
+ oooFlag = false;
41
+ const int arrayLen = 1 << lgCouponArrInts;
42
+ typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
43
+ couponIntArr = intAlloc().allocate(arrayLen);
44
+ std::fill(couponIntArr, couponIntArr + arrayLen, 0);
45
+ couponCount = 0;
46
+ }
47
+
48
+ template<typename A>
49
+ CouponList<A>::CouponList(const CouponList& that)
50
+ : HllSketchImpl<A>(that.lgConfigK, that.tgtHllType, that.mode, false),
51
+ lgCouponArrInts(that.lgCouponArrInts),
52
+ couponCount(that.couponCount),
53
+ oooFlag(that.oooFlag) {
54
+
55
+ const int numItems = 1 << lgCouponArrInts;
56
+ typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
57
+ couponIntArr = intAlloc().allocate(numItems);
58
+ std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr);
59
+ }
60
+
61
+ template<typename A>
62
+ CouponList<A>::CouponList(const CouponList& that, const target_hll_type tgtHllType)
63
+ : HllSketchImpl<A>(that.lgConfigK, tgtHllType, that.mode, false),
64
+ lgCouponArrInts(that.lgCouponArrInts),
65
+ couponCount(that.couponCount),
66
+ oooFlag(that.oooFlag) {
67
+
68
+ const int numItems = 1 << lgCouponArrInts;
69
+ typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
70
+ couponIntArr = intAlloc().allocate(numItems);
71
+ std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr);
72
+ }
73
+
74
+ template<typename A>
75
+ CouponList<A>::~CouponList() {
76
+ typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
77
+ intAlloc().deallocate(couponIntArr, 1 << lgCouponArrInts);
78
+ }
79
+
80
+ template<typename A>
81
+ std::function<void(HllSketchImpl<A>*)> CouponList<A>::get_deleter() const {
82
+ return [](HllSketchImpl<A>* ptr) {
83
+ CouponList<A>* cl = static_cast<CouponList<A>*>(ptr);
84
+ cl->~CouponList();
85
+ clAlloc().deallocate(cl, 1);
86
+ };
87
+ }
88
+
89
+ template<typename A>
90
+ CouponList<A>* CouponList<A>::copy() const {
91
+ return new (clAlloc().allocate(1)) CouponList<A>(*this);
92
+ }
93
+
94
+ template<typename A>
95
+ CouponList<A>* CouponList<A>::copyAs(target_hll_type tgtHllType) const {
96
+ return new (clAlloc().allocate(1)) CouponList<A>(*this, tgtHllType);
97
+ }
98
+
99
+ template<typename A>
100
+ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) {
101
+ if (len < HllUtil<A>::LIST_INT_ARR_START) {
102
+ throw std::out_of_range("Input data length insufficient to hold CouponHashSet");
103
+ }
104
+
105
+ const uint8_t* data = static_cast<const uint8_t*>(bytes);
106
+ if (data[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::LIST_PREINTS) {
107
+ throw std::invalid_argument("Incorrect number of preInts in input stream");
108
+ }
109
+ if (data[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
110
+ throw std::invalid_argument("Wrong ser ver in input stream");
111
+ }
112
+ if (data[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
113
+ throw std::invalid_argument("Input stream is not an HLL sketch");
114
+ }
115
+
116
+ hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
117
+ if (mode != LIST) {
118
+ throw std::invalid_argument("Calling set construtor with non-list mode data");
119
+ }
120
+
121
+ target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
122
+
123
+ const int lgK = data[HllUtil<A>::LG_K_BYTE];
124
+ const bool compact = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
125
+ const bool oooFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
126
+ const bool emptyFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::EMPTY_FLAG_MASK) ? true : false);
127
+
128
+ const int couponCount = data[HllUtil<A>::LIST_COUNT_BYTE];
129
+ const int couponsInArray = (compact ? couponCount : (1 << HllUtil<A>::computeLgArrInts(LIST, couponCount, lgK)));
130
+ const size_t expectedLength = HllUtil<A>::LIST_INT_ARR_START + (couponsInArray * sizeof(int));
131
+ if (len < expectedLength) {
132
+ throw std::out_of_range("Byte array too short for sketch. Expected " + std::to_string(expectedLength)
133
+ + ", found: " + std::to_string(len));
134
+ }
135
+
136
+ CouponList<A>* sketch = new (clAlloc().allocate(1)) CouponList<A>(lgK, tgtHllType, mode);
137
+ sketch->couponCount = couponCount;
138
+ sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST
139
+
140
+ if (!emptyFlag) {
141
+ // only need to read valid coupons, unlike in stream case
142
+ std::memcpy(sketch->couponIntArr, data + HllUtil<A>::LIST_INT_ARR_START, couponCount * sizeof(int));
143
+ }
144
+
145
+ return sketch;
146
+ }
147
+
148
+ template<typename A>
149
+ CouponList<A>* CouponList<A>::newList(std::istream& is) {
150
+ uint8_t listHeader[8];
151
+ is.read((char*)listHeader, 8 * sizeof(uint8_t));
152
+
153
+ if (listHeader[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::LIST_PREINTS) {
154
+ throw std::invalid_argument("Incorrect number of preInts in input stream");
155
+ }
156
+ if (listHeader[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
157
+ throw std::invalid_argument("Wrong ser ver in input stream");
158
+ }
159
+ if (listHeader[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
160
+ throw std::invalid_argument("Input stream is not an HLL sketch");
161
+ }
162
+
163
+ hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
164
+ if (mode != LIST) {
165
+ throw std::invalid_argument("Calling list construtor with non-list mode data");
166
+ }
167
+
168
+ const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
169
+
170
+ const int lgK = (int) listHeader[HllUtil<A>::LG_K_BYTE];
171
+ const bool compact = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
172
+ const bool oooFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
173
+ const bool emptyFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::EMPTY_FLAG_MASK) ? true : false);
174
+
175
+ CouponList<A>* sketch = new (clAlloc().allocate(1)) CouponList<A>(lgK, tgtHllType, mode);
176
+ typedef std::unique_ptr<CouponList<A>, std::function<void(HllSketchImpl<A>*)>> coupon_list_ptr;
177
+ coupon_list_ptr ptr(sketch, sketch->get_deleter());
178
+ const int couponCount = listHeader[HllUtil<A>::LIST_COUNT_BYTE];
179
+ sketch->couponCount = couponCount;
180
+ sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST
181
+
182
+ if (!emptyFlag) {
183
+ // For stream processing, need to read entire number written to stream so read
184
+ // pointer ends up set correctly.
185
+ // If not compact, still need to read empty items even though in order.
186
+ const int numToRead = (compact ? couponCount : (1 << sketch->lgCouponArrInts));
187
+ is.read((char*)sketch->couponIntArr, numToRead * sizeof(int));
188
+ }
189
+
190
+ if (!is.good())
191
+ throw std::runtime_error("error reading from std::istream");
192
+
193
+ return ptr.release();
194
+ }
195
+
196
+ template<typename A>
197
+ vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes) const {
198
+ const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes;
199
+ vector_u8<A> byteArr(sketchSizeBytes);
200
+ uint8_t* bytes = byteArr.data() + header_size_bytes;
201
+
202
+ bytes[HllUtil<A>::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
203
+ bytes[HllUtil<A>::SER_VER_BYTE] = static_cast<uint8_t>(HllUtil<A>::SER_VER);
204
+ bytes[HllUtil<A>::FAMILY_BYTE] = static_cast<uint8_t>(HllUtil<A>::FAMILY_ID);
205
+ bytes[HllUtil<A>::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK);
206
+ bytes[HllUtil<A>::LG_ARR_BYTE] = static_cast<uint8_t>(lgCouponArrInts);
207
+ bytes[HllUtil<A>::FLAGS_BYTE] = this->makeFlagsByte(compact);
208
+ bytes[HllUtil<A>::LIST_COUNT_BYTE] = static_cast<uint8_t>(this->mode == LIST ? couponCount : 0);
209
+ bytes[HllUtil<A>::MODE_BYTE] = this->makeModeByte();
210
+
211
+ if (this->mode == SET) {
212
+ std::memcpy(bytes + HllUtil<A>::HASH_SET_COUNT_INT, &couponCount, sizeof(couponCount));
213
+ }
214
+
215
+ // coupons
216
+ // isCompact() is always false for now
217
+ const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
218
+ switch (sw) {
219
+ case 0: { // src updatable, dst updatable
220
+ std::memcpy(bytes + getMemDataStart(), getCouponIntArr(), (1 << lgCouponArrInts) * sizeof(int));
221
+ break;
222
+ }
223
+ case 1: { // src updatable, dst compact
224
+ bytes += getMemDataStart(); // reusing pointer for incremental writes
225
+ for (uint32_t coupon: *this) {
226
+ std::memcpy(bytes, &coupon, sizeof(coupon));
227
+ bytes += sizeof(coupon);
228
+ }
229
+ break;
230
+ }
231
+
232
+ default:
233
+ throw std::runtime_error("Impossible condition when serializing");
234
+ }
235
+
236
+ return byteArr;
237
+ }
238
+
239
+ template<typename A>
240
+ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
241
+ // header
242
+ const uint8_t preInts(getPreInts());
243
+ os.write((char*)&preInts, sizeof(preInts));
244
+ const uint8_t serialVersion(HllUtil<A>::SER_VER);
245
+ os.write((char*)&serialVersion, sizeof(serialVersion));
246
+ const uint8_t familyId(HllUtil<A>::FAMILY_ID);
247
+ os.write((char*)&familyId, sizeof(familyId));
248
+ const uint8_t lgKByte((uint8_t) this->lgConfigK);
249
+ os.write((char*)&lgKByte, sizeof(lgKByte));
250
+ const uint8_t lgArrIntsByte((uint8_t) lgCouponArrInts);
251
+ os.write((char*)&lgArrIntsByte, sizeof(lgArrIntsByte));
252
+ const uint8_t flagsByte(this->makeFlagsByte(compact));
253
+ os.write((char*)&flagsByte, sizeof(flagsByte));
254
+
255
+ if (this->mode == LIST) {
256
+ const uint8_t listCount((uint8_t) couponCount);
257
+ os.write((char*)&listCount, sizeof(listCount));
258
+ } else { // mode == SET
259
+ const uint8_t unused(0);
260
+ os.write((char*)&unused, sizeof(unused));
261
+ }
262
+
263
+ const uint8_t modeByte(this->makeModeByte());
264
+ os.write((char*)&modeByte, sizeof(modeByte));
265
+
266
+ if (this->mode == SET) {
267
+ // writing as int, already stored as int
268
+ os.write((char*)&couponCount, sizeof(couponCount));
269
+ }
270
+
271
+ // coupons
272
+ // isCompact() is always false for now
273
+ const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
274
+ switch (sw) {
275
+ case 0: { // src updatable, dst updatable
276
+ os.write((char*)getCouponIntArr(), (1 << lgCouponArrInts) * sizeof(int));
277
+ break;
278
+ }
279
+ case 1: { // src updatable, dst compact
280
+ for (uint32_t coupon: *this) {
281
+ os.write((char*)&coupon, sizeof(coupon));
282
+ }
283
+ break;
284
+ }
285
+
286
+ default:
287
+ throw std::runtime_error("Impossible condition when serializing");
288
+ }
289
+
290
+ return;
291
+ }
292
+
293
+ template<typename A>
294
+ HllSketchImpl<A>* CouponList<A>::couponUpdate(int coupon) {
295
+ const int len = 1 << lgCouponArrInts;
296
+ for (int i = 0; i < len; ++i) { // search for empty slot
297
+ const int couponAtIdx = couponIntArr[i];
298
+ if (couponAtIdx == HllUtil<A>::EMPTY) {
299
+ couponIntArr[i] = coupon; // the actual update
300
+ ++couponCount;
301
+ if (couponCount >= len) { // array full
302
+ if (this->lgConfigK < 8) {
303
+ return promoteHeapListOrSetToHll(*this);
304
+ }
305
+ return promoteHeapListToSet(*this);
306
+ }
307
+ return this;
308
+ }
309
+ // cell not empty
310
+ if (couponAtIdx == coupon) {
311
+ return this; // duplicate
312
+ }
313
+ // cell not empty and not a duplicate, continue
314
+ }
315
+ throw std::runtime_error("Array invalid: no empties and no duplicates");
316
+ }
317
+
318
+ template<typename A>
319
+ double CouponList<A>::getCompositeEstimate() const { return getEstimate(); }
320
+
321
+ template<typename A>
322
+ double CouponList<A>::getEstimate() const {
323
+ const int couponCount = getCouponCount();
324
+ const double est = CubicInterpolation<A>::usingXAndYTables(couponCount);
325
+ return fmax(est, couponCount);
326
+ }
327
+
328
+ template<typename A>
329
+ double CouponList<A>::getLowerBound(const int numStdDev) const {
330
+ HllUtil<A>::checkNumStdDev(numStdDev);
331
+ const int couponCount = getCouponCount();
332
+ const double est = CubicInterpolation<A>::usingXAndYTables(couponCount);
333
+ const double tmp = est / (1.0 + (numStdDev * HllUtil<A>::COUPON_RSE));
334
+ return fmax(tmp, couponCount);
335
+ }
336
+
337
+ template<typename A>
338
+ double CouponList<A>::getUpperBound(const int numStdDev) const {
339
+ HllUtil<A>::checkNumStdDev(numStdDev);
340
+ const int couponCount = getCouponCount();
341
+ const double est = CubicInterpolation<A>::usingXAndYTables(couponCount);
342
+ const double tmp = est / (1.0 - (numStdDev * HllUtil<A>::COUPON_RSE));
343
+ return fmax(tmp, couponCount);
344
+ }
345
+
346
+ template<typename A>
347
+ bool CouponList<A>::isEmpty() const { return getCouponCount() == 0; }
348
+
349
+ template<typename A>
350
+ int CouponList<A>::getUpdatableSerializationBytes() const {
351
+ return getMemDataStart() + (4 << getLgCouponArrInts());
352
+ }
353
+
354
+ template<typename A>
355
+ int CouponList<A>::getCouponCount() const {
356
+ return couponCount;
357
+ }
358
+
359
+ template<typename A>
360
+ int CouponList<A>::getCompactSerializationBytes() const {
361
+ return getMemDataStart() + (couponCount << 2);
362
+ }
363
+
364
+ template<typename A>
365
+ int CouponList<A>::getMemDataStart() const {
366
+ return HllUtil<A>::LIST_INT_ARR_START;
367
+ }
368
+
369
+ template<typename A>
370
+ int CouponList<A>::getPreInts() const {
371
+ return HllUtil<A>::LIST_PREINTS;
372
+ }
373
+
374
+ template<typename A>
375
+ bool CouponList<A>::isCompact() const { return false; }
376
+
377
+ template<typename A>
378
+ bool CouponList<A>::isOutOfOrderFlag() const { return oooFlag; }
379
+
380
+ template<typename A>
381
+ void CouponList<A>::putOutOfOrderFlag(bool oooFlag) {
382
+ this->oooFlag = oooFlag;
383
+ }
384
+
385
+ template<typename A>
386
+ int CouponList<A>::getLgCouponArrInts() const {
387
+ return lgCouponArrInts;
388
+ }
389
+
390
+ template<typename A>
391
+ int* CouponList<A>::getCouponIntArr() const {
392
+ return couponIntArr;
393
+ }
394
+
395
+ template<typename A>
396
+ HllSketchImpl<A>* CouponList<A>::promoteHeapListToSet(CouponList& list) {
397
+ return HllSketchImplFactory<A>::promoteListToSet(list);
398
+ }
399
+
400
+ template<typename A>
401
+ HllSketchImpl<A>* CouponList<A>::promoteHeapListOrSetToHll(CouponList& src) {
402
+ return HllSketchImplFactory<A>::promoteListOrSetToHll(src);
403
+ }
404
+
405
+ template<typename A>
406
+ coupon_iterator<A> CouponList<A>::begin(bool all) const {
407
+ return coupon_iterator<A>(couponIntArr, 1 << lgCouponArrInts, 0, all);
408
+ }
409
+
410
+ template<typename A>
411
+ coupon_iterator<A> CouponList<A>::end() const {
412
+ return coupon_iterator<A>(couponIntArr, 1 << lgCouponArrInts, 1 << lgCouponArrInts, false);
413
+ }
414
+
415
+ }
416
+
417
+ #endif // _COUPONLIST_INTERNAL_HPP_