datasketches 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,40 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _COMPOSITEINTERPOLATIONXTABLE_HPP_
21
+ #define _COMPOSITEINTERPOLATIONXTABLE_HPP_
22
+
23
+ #include <memory>
24
+
25
+ namespace datasketches {
26
+
27
+ template<typename A = std::allocator<char>>
28
+ class CompositeInterpolationXTable {
29
+ public:
30
+ static int get_y_stride(int logK);
31
+
32
+ static const double* get_x_arr(int logK);
33
+ static int get_x_arr_length();
34
+ };
35
+
36
+ }
37
+
38
+ #include "CompositeInterpolationXTable-internal.hpp"
39
+
40
+ #endif /* _COMPOSITEINTERPOLATIONXTABLE_HPP_ */
@@ -0,0 +1,291 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _COUPONHASHSET_INTERNAL_HPP_
21
+ #define _COUPONHASHSET_INTERNAL_HPP_
22
+
23
+ #include "CouponHashSet.hpp"
24
+
25
+ #include <cstring>
26
+ #include <exception>
27
+
28
+ namespace datasketches {
29
+
30
+ template<typename A>
31
+ static int find(const int* array, const int lgArrInts, const int coupon);
32
+
33
+ template<typename A>
34
+ CouponHashSet<A>::CouponHashSet(const int lgConfigK, const target_hll_type tgtHllType)
35
+ : CouponList<A>(lgConfigK, tgtHllType, hll_mode::SET)
36
+ {
37
+ if (lgConfigK <= 7) {
38
+ throw std::invalid_argument("CouponHashSet must be initialized with lgConfigK > 7. Found: "
39
+ + std::to_string(lgConfigK));
40
+ }
41
+ }
42
+
43
+ template<typename A>
44
+ CouponHashSet<A>::CouponHashSet(const CouponHashSet<A>& that)
45
+ : CouponList<A>(that) {}
46
+
47
+ template<typename A>
48
+ CouponHashSet<A>::CouponHashSet(const CouponHashSet<A>& that, const target_hll_type tgtHllType)
49
+ : CouponList<A>(that, tgtHllType) {}
50
+
51
+ template<typename A>
52
+ CouponHashSet<A>::~CouponHashSet() {}
53
+
54
+ template<typename A>
55
+ std::function<void(HllSketchImpl<A>*)> CouponHashSet<A>::get_deleter() const {
56
+ return [](HllSketchImpl<A>* ptr) {
57
+ CouponHashSet<A>* chs = static_cast<CouponHashSet<A>*>(ptr);
58
+ chs->~CouponHashSet();
59
+ chsAlloc().deallocate(chs, 1);
60
+ };
61
+ }
62
+
63
+ template<typename A>
64
+ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len) {
65
+ if (len < HllUtil<A>::HASH_SET_INT_ARR_START) { // hard-coded
66
+ throw std::out_of_range("Input data length insufficient to hold CouponHashSet");
67
+ }
68
+
69
+ const uint8_t* data = static_cast<const uint8_t*>(bytes);
70
+ if (data[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::HASH_SET_PREINTS) {
71
+ throw std::invalid_argument("Incorrect number of preInts in input stream");
72
+ }
73
+ if (data[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
74
+ throw std::invalid_argument("Wrong ser ver in input stream");
75
+ }
76
+ if (data[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
77
+ throw std::invalid_argument("Input stream is not an HLL sketch");
78
+ }
79
+
80
+ const hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
81
+ if (mode != SET) {
82
+ throw std::invalid_argument("Calling set construtor with non-set mode data");
83
+ }
84
+
85
+ const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
86
+
87
+ const int lgK = data[HllUtil<A>::LG_K_BYTE];
88
+ if (lgK <= 7) {
89
+ throw std::invalid_argument("Attempt to deserialize invalid CouponHashSet with lgConfigK <= 7. Found: "
90
+ + std::to_string(lgK));
91
+ }
92
+ int lgArrInts = data[HllUtil<A>::LG_ARR_BYTE];
93
+ const bool compactFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
94
+
95
+ int couponCount;
96
+ std::memcpy(&couponCount, data + HllUtil<A>::HASH_SET_COUNT_INT, sizeof(couponCount));
97
+ if (lgArrInts < HllUtil<A>::LG_INIT_SET_SIZE) {
98
+ lgArrInts = HllUtil<A>::computeLgArrInts(SET, couponCount, lgK);
99
+ }
100
+ // Don't set couponCount in sketch here;
101
+ // we'll set later if updatable, and increment with updates if compact
102
+ const int couponsInArray = (compactFlag ? couponCount : (1 << lgArrInts));
103
+ const size_t expectedLength = HllUtil<A>::HASH_SET_INT_ARR_START + (couponsInArray * sizeof(int));
104
+ if (len < expectedLength) {
105
+ throw std::out_of_range("Byte array too short for sketch. Expected " + std::to_string(expectedLength)
106
+ + ", found: " + std::to_string(len));
107
+ }
108
+
109
+ CouponHashSet<A>* sketch = new (chsAlloc().allocate(1)) CouponHashSet<A>(lgK, tgtHllType);
110
+
111
+ if (compactFlag) {
112
+ const uint8_t* curPos = data + HllUtil<A>::HASH_SET_INT_ARR_START;
113
+ int coupon;
114
+ for (int i = 0; i < couponCount; ++i, curPos += sizeof(coupon)) {
115
+ std::memcpy(&coupon, curPos, sizeof(coupon));
116
+ sketch->couponUpdate(coupon);
117
+ }
118
+ } else {
119
+ int* oldArr = sketch->couponIntArr;
120
+ const size_t oldArrLen = 1 << sketch->lgCouponArrInts;
121
+ sketch->lgCouponArrInts = lgArrInts;
122
+ typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
123
+ sketch->couponIntArr = intAlloc().allocate(1 << lgArrInts);
124
+ sketch->couponCount = couponCount;
125
+ // only need to read valid coupons, unlike in stream case
126
+ std::memcpy(sketch->couponIntArr,
127
+ data + HllUtil<A>::HASH_SET_INT_ARR_START,
128
+ couponCount * sizeof(int));
129
+ intAlloc().deallocate(oldArr, oldArrLen);
130
+ }
131
+
132
+ return sketch;
133
+ }
134
+
135
+ template<typename A>
136
+ CouponHashSet<A>* CouponHashSet<A>::newSet(std::istream& is) {
137
+ uint8_t listHeader[8];
138
+ is.read((char*)listHeader, 8 * sizeof(uint8_t));
139
+
140
+ if (listHeader[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::HASH_SET_PREINTS) {
141
+ throw std::invalid_argument("Incorrect number of preInts in input stream");
142
+ }
143
+ if (listHeader[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
144
+ throw std::invalid_argument("Wrong ser ver in input stream");
145
+ }
146
+ if (listHeader[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
147
+ throw std::invalid_argument("Input stream is not an HLL sketch");
148
+ }
149
+
150
+ hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
151
+ if (mode != SET) {
152
+ throw std::invalid_argument("Calling set construtor with non-set mode data");
153
+ }
154
+
155
+ target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
156
+
157
+ const int lgK = listHeader[HllUtil<A>::LG_K_BYTE];
158
+ if (lgK <= 7) {
159
+ throw std::invalid_argument("Attempt to deserialize invalid CouponHashSet with lgConfigK <= 7. Found: "
160
+ + std::to_string(lgK));
161
+ }
162
+ int lgArrInts = listHeader[HllUtil<A>::LG_ARR_BYTE];
163
+ const bool compactFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
164
+
165
+ int couponCount;
166
+ is.read((char*)&couponCount, sizeof(couponCount));
167
+ if (lgArrInts < HllUtil<A>::LG_INIT_SET_SIZE) {
168
+ lgArrInts = HllUtil<A>::computeLgArrInts(SET, couponCount, lgK);
169
+ }
170
+
171
+ CouponHashSet<A>* sketch = new (chsAlloc().allocate(1)) CouponHashSet<A>(lgK, tgtHllType);
172
+ typedef std::unique_ptr<CouponHashSet<A>, std::function<void(HllSketchImpl<A>*)>> coupon_hash_set_ptr;
173
+ coupon_hash_set_ptr ptr(sketch, sketch->get_deleter());
174
+
175
+ // Don't set couponCount here;
176
+ // we'll set later if updatable, and increment with updates if compact
177
+ if (compactFlag) {
178
+ for (int i = 0; i < couponCount; ++i) {
179
+ int coupon;
180
+ is.read((char*)&coupon, sizeof(coupon));
181
+ sketch->couponUpdate(coupon);
182
+ }
183
+ } else {
184
+ typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
185
+ intAlloc().deallocate(sketch->couponIntArr, 1 << sketch->lgCouponArrInts);
186
+ sketch->lgCouponArrInts = lgArrInts;
187
+ sketch->couponIntArr = intAlloc().allocate(1 << lgArrInts);
188
+ sketch->couponCount = couponCount;
189
+ // for stream processing, read entire list so read pointer ends up set correctly
190
+ is.read((char*)sketch->couponIntArr, (1 << sketch->lgCouponArrInts) * sizeof(int));
191
+ }
192
+
193
+ if (!is.good())
194
+ throw std::runtime_error("error reading from std::istream");
195
+
196
+ return ptr.release();
197
+ }
198
+
199
+ template<typename A>
200
+ CouponHashSet<A>* CouponHashSet<A>::copy() const {
201
+ return new (chsAlloc().allocate(1)) CouponHashSet<A>(*this);
202
+ }
203
+
204
+ template<typename A>
205
+ CouponHashSet<A>* CouponHashSet<A>::copyAs(const target_hll_type tgtHllType) const {
206
+ return new (chsAlloc().allocate(1)) CouponHashSet<A>(*this, tgtHllType);
207
+ }
208
+
209
+ template<typename A>
210
+ HllSketchImpl<A>* CouponHashSet<A>::couponUpdate(int coupon) {
211
+ const int index = find<A>(this->couponIntArr, this->lgCouponArrInts, coupon);
212
+ if (index >= 0) {
213
+ return this; // found duplicate, ignore
214
+ }
215
+ this->couponIntArr[~index] = coupon; // found empty
216
+ ++this->couponCount;
217
+ if (checkGrowOrPromote()) {
218
+ return this->promoteHeapListOrSetToHll(*this);
219
+ }
220
+ return this;
221
+ }
222
+
223
+ template<typename A>
224
+ int CouponHashSet<A>::getMemDataStart() const {
225
+ return HllUtil<A>::HASH_SET_INT_ARR_START;
226
+ }
227
+
228
+ template<typename A>
229
+ int CouponHashSet<A>::getPreInts() const {
230
+ return HllUtil<A>::HASH_SET_PREINTS;
231
+ }
232
+
233
+ template<typename A>
234
+ bool CouponHashSet<A>::checkGrowOrPromote() {
235
+ if ((HllUtil<A>::RESIZE_DENOM * this->couponCount) > (HllUtil<A>::RESIZE_NUMER * (1 << this->lgCouponArrInts))) {
236
+ if (this->lgCouponArrInts == (this->lgConfigK - 3)) { // at max size
237
+ return true; // promote to HLL
238
+ }
239
+ int tgtLgCoupArrSize = this->lgCouponArrInts + 1;
240
+ growHashSet(this->lgCouponArrInts, tgtLgCoupArrSize);
241
+ }
242
+ return false;
243
+ }
244
+
245
+ template<typename A>
246
+ void CouponHashSet<A>::growHashSet(const int srcLgCoupArrSize, const int tgtLgCoupArrSize) {
247
+ const int tgtLen = 1 << tgtLgCoupArrSize;
248
+ typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
249
+ int* tgtCouponIntArr = intAlloc().allocate(tgtLen);
250
+ std::fill(tgtCouponIntArr, tgtCouponIntArr + tgtLen, 0);
251
+
252
+ const int srcLen = 1 << srcLgCoupArrSize;
253
+ for (int i = 0; i < srcLen; ++i) { // scan existing array for non-zero values
254
+ const int fetched = this->couponIntArr[i];
255
+ if (fetched != HllUtil<A>::EMPTY) {
256
+ const int idx = find<A>(tgtCouponIntArr, tgtLgCoupArrSize, fetched); // search TGT array
257
+ if (idx < 0) { // found EMPTY
258
+ tgtCouponIntArr[~idx] = fetched; // insert
259
+ continue;
260
+ }
261
+ throw std::runtime_error("Error: Found duplicate coupon");
262
+ }
263
+ }
264
+
265
+ intAlloc().deallocate(this->couponIntArr, 1 << this->lgCouponArrInts);
266
+ this->couponIntArr = tgtCouponIntArr;
267
+ this->lgCouponArrInts = tgtLgCoupArrSize;
268
+ }
269
+
270
+ template<typename A>
271
+ static int find(const int* array, const int lgArrInts, const int coupon) {
272
+ const int arrMask = (1 << lgArrInts) - 1;
273
+ int probe = coupon & arrMask;
274
+ const int loopIndex = probe;
275
+ do {
276
+ const int couponAtIdx = array[probe];
277
+ if (couponAtIdx == HllUtil<A>::EMPTY) {
278
+ return ~probe; //empty
279
+ }
280
+ else if (coupon == couponAtIdx) {
281
+ return probe; //duplicate
282
+ }
283
+ const int stride = ((coupon & HllUtil<A>::KEY_MASK_26) >> lgArrInts) | 1;
284
+ probe = (probe + stride) & arrMask;
285
+ } while (probe != loopIndex);
286
+ throw std::invalid_argument("Key not found and no empty slots!");
287
+ }
288
+
289
+ }
290
+
291
+ #endif // _COUPONHASHSET_INTERNAL_HPP_
@@ -0,0 +1,59 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _COUPONHASHSET_HPP_
21
+ #define _COUPONHASHSET_HPP_
22
+
23
+ #include "CouponList.hpp"
24
+
25
+ namespace datasketches {
26
+
27
+ template<typename A = std::allocator<char>>
28
+ class CouponHashSet : public CouponList<A> {
29
+ public:
30
+ static CouponHashSet* newSet(const void* bytes, size_t len);
31
+ static CouponHashSet* newSet(std::istream& is);
32
+ explicit CouponHashSet(int lgConfigK, target_hll_type tgtHllType);
33
+ explicit CouponHashSet(const CouponHashSet& that, target_hll_type tgtHllType);
34
+ explicit CouponHashSet(const CouponHashSet& that);
35
+
36
+ virtual ~CouponHashSet();
37
+ virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
38
+
39
+ protected:
40
+
41
+ virtual CouponHashSet* copy() const;
42
+ virtual CouponHashSet* copyAs(target_hll_type tgtHllType) const;
43
+
44
+ virtual HllSketchImpl<A>* couponUpdate(int coupon);
45
+
46
+ virtual int getMemDataStart() const;
47
+ virtual int getPreInts() const;
48
+
49
+ friend class HllSketchImplFactory<A>;
50
+
51
+ private:
52
+ typedef typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>> chsAlloc;
53
+ bool checkGrowOrPromote();
54
+ void growHashSet(int srcLgCoupArrSize, int tgtLgCoupArrSize);
55
+ };
56
+
57
+ }
58
+
59
+ #endif /* _COUPONHASHSET_HPP_ */
@@ -0,0 +1,417 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _COUPONLIST_INTERNAL_HPP_
21
+ #define _COUPONLIST_INTERNAL_HPP_
22
+
23
+ #include "CouponList.hpp"
24
+ #include "CubicInterpolation.hpp"
25
+ #include "HllUtil.hpp"
26
+
27
+ #include <algorithm>
28
+ #include <cmath>
29
+
30
+ namespace datasketches {
31
+
32
+ template<typename A>
33
+ CouponList<A>::CouponList(const int lgConfigK, const target_hll_type tgtHllType, const hll_mode mode)
34
+ : HllSketchImpl<A>(lgConfigK, tgtHllType, mode, false) {
35
+ if (mode == hll_mode::LIST) {
36
+ lgCouponArrInts = HllUtil<A>::LG_INIT_LIST_SIZE;
37
+ } else { // mode == SET
38
+ lgCouponArrInts = HllUtil<A>::LG_INIT_SET_SIZE;
39
+ }
40
+ oooFlag = false;
41
+ const int arrayLen = 1 << lgCouponArrInts;
42
+ typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
43
+ couponIntArr = intAlloc().allocate(arrayLen);
44
+ std::fill(couponIntArr, couponIntArr + arrayLen, 0);
45
+ couponCount = 0;
46
+ }
47
+
48
+ template<typename A>
49
+ CouponList<A>::CouponList(const CouponList& that)
50
+ : HllSketchImpl<A>(that.lgConfigK, that.tgtHllType, that.mode, false),
51
+ lgCouponArrInts(that.lgCouponArrInts),
52
+ couponCount(that.couponCount),
53
+ oooFlag(that.oooFlag) {
54
+
55
+ const int numItems = 1 << lgCouponArrInts;
56
+ typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
57
+ couponIntArr = intAlloc().allocate(numItems);
58
+ std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr);
59
+ }
60
+
61
+ template<typename A>
62
+ CouponList<A>::CouponList(const CouponList& that, const target_hll_type tgtHllType)
63
+ : HllSketchImpl<A>(that.lgConfigK, tgtHllType, that.mode, false),
64
+ lgCouponArrInts(that.lgCouponArrInts),
65
+ couponCount(that.couponCount),
66
+ oooFlag(that.oooFlag) {
67
+
68
+ const int numItems = 1 << lgCouponArrInts;
69
+ typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
70
+ couponIntArr = intAlloc().allocate(numItems);
71
+ std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr);
72
+ }
73
+
74
+ template<typename A>
75
+ CouponList<A>::~CouponList() {
76
+ typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
77
+ intAlloc().deallocate(couponIntArr, 1 << lgCouponArrInts);
78
+ }
79
+
80
+ template<typename A>
81
+ std::function<void(HllSketchImpl<A>*)> CouponList<A>::get_deleter() const {
82
+ return [](HllSketchImpl<A>* ptr) {
83
+ CouponList<A>* cl = static_cast<CouponList<A>*>(ptr);
84
+ cl->~CouponList();
85
+ clAlloc().deallocate(cl, 1);
86
+ };
87
+ }
88
+
89
+ template<typename A>
90
+ CouponList<A>* CouponList<A>::copy() const {
91
+ return new (clAlloc().allocate(1)) CouponList<A>(*this);
92
+ }
93
+
94
+ template<typename A>
95
+ CouponList<A>* CouponList<A>::copyAs(target_hll_type tgtHllType) const {
96
+ return new (clAlloc().allocate(1)) CouponList<A>(*this, tgtHllType);
97
+ }
98
+
99
+ template<typename A>
100
+ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) {
101
+ if (len < HllUtil<A>::LIST_INT_ARR_START) {
102
+ throw std::out_of_range("Input data length insufficient to hold CouponHashSet");
103
+ }
104
+
105
+ const uint8_t* data = static_cast<const uint8_t*>(bytes);
106
+ if (data[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::LIST_PREINTS) {
107
+ throw std::invalid_argument("Incorrect number of preInts in input stream");
108
+ }
109
+ if (data[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
110
+ throw std::invalid_argument("Wrong ser ver in input stream");
111
+ }
112
+ if (data[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
113
+ throw std::invalid_argument("Input stream is not an HLL sketch");
114
+ }
115
+
116
+ hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
117
+ if (mode != LIST) {
118
+ throw std::invalid_argument("Calling set construtor with non-list mode data");
119
+ }
120
+
121
+ target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
122
+
123
+ const int lgK = data[HllUtil<A>::LG_K_BYTE];
124
+ const bool compact = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
125
+ const bool oooFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
126
+ const bool emptyFlag = ((data[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::EMPTY_FLAG_MASK) ? true : false);
127
+
128
+ const int couponCount = data[HllUtil<A>::LIST_COUNT_BYTE];
129
+ const int couponsInArray = (compact ? couponCount : (1 << HllUtil<A>::computeLgArrInts(LIST, couponCount, lgK)));
130
+ const size_t expectedLength = HllUtil<A>::LIST_INT_ARR_START + (couponsInArray * sizeof(int));
131
+ if (len < expectedLength) {
132
+ throw std::out_of_range("Byte array too short for sketch. Expected " + std::to_string(expectedLength)
133
+ + ", found: " + std::to_string(len));
134
+ }
135
+
136
+ CouponList<A>* sketch = new (clAlloc().allocate(1)) CouponList<A>(lgK, tgtHllType, mode);
137
+ sketch->couponCount = couponCount;
138
+ sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST
139
+
140
+ if (!emptyFlag) {
141
+ // only need to read valid coupons, unlike in stream case
142
+ std::memcpy(sketch->couponIntArr, data + HllUtil<A>::LIST_INT_ARR_START, couponCount * sizeof(int));
143
+ }
144
+
145
+ return sketch;
146
+ }
147
+
148
+ template<typename A>
149
+ CouponList<A>* CouponList<A>::newList(std::istream& is) {
150
+ uint8_t listHeader[8];
151
+ is.read((char*)listHeader, 8 * sizeof(uint8_t));
152
+
153
+ if (listHeader[HllUtil<A>::PREAMBLE_INTS_BYTE] != HllUtil<A>::LIST_PREINTS) {
154
+ throw std::invalid_argument("Incorrect number of preInts in input stream");
155
+ }
156
+ if (listHeader[HllUtil<A>::SER_VER_BYTE] != HllUtil<A>::SER_VER) {
157
+ throw std::invalid_argument("Wrong ser ver in input stream");
158
+ }
159
+ if (listHeader[HllUtil<A>::FAMILY_BYTE] != HllUtil<A>::FAMILY_ID) {
160
+ throw std::invalid_argument("Input stream is not an HLL sketch");
161
+ }
162
+
163
+ hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
164
+ if (mode != LIST) {
165
+ throw std::invalid_argument("Calling list construtor with non-list mode data");
166
+ }
167
+
168
+ const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
169
+
170
+ const int lgK = (int) listHeader[HllUtil<A>::LG_K_BYTE];
171
+ const bool compact = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::COMPACT_FLAG_MASK) ? true : false);
172
+ const bool oooFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
173
+ const bool emptyFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::EMPTY_FLAG_MASK) ? true : false);
174
+
175
+ CouponList<A>* sketch = new (clAlloc().allocate(1)) CouponList<A>(lgK, tgtHllType, mode);
176
+ typedef std::unique_ptr<CouponList<A>, std::function<void(HllSketchImpl<A>*)>> coupon_list_ptr;
177
+ coupon_list_ptr ptr(sketch, sketch->get_deleter());
178
+ const int couponCount = listHeader[HllUtil<A>::LIST_COUNT_BYTE];
179
+ sketch->couponCount = couponCount;
180
+ sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST
181
+
182
+ if (!emptyFlag) {
183
+ // For stream processing, need to read entire number written to stream so read
184
+ // pointer ends up set correctly.
185
+ // If not compact, still need to read empty items even though in order.
186
+ const int numToRead = (compact ? couponCount : (1 << sketch->lgCouponArrInts));
187
+ is.read((char*)sketch->couponIntArr, numToRead * sizeof(int));
188
+ }
189
+
190
+ if (!is.good())
191
+ throw std::runtime_error("error reading from std::istream");
192
+
193
+ return ptr.release();
194
+ }
195
+
196
+ template<typename A>
197
+ vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes) const {
198
+ const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes;
199
+ vector_u8<A> byteArr(sketchSizeBytes);
200
+ uint8_t* bytes = byteArr.data() + header_size_bytes;
201
+
202
+ bytes[HllUtil<A>::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
203
+ bytes[HllUtil<A>::SER_VER_BYTE] = static_cast<uint8_t>(HllUtil<A>::SER_VER);
204
+ bytes[HllUtil<A>::FAMILY_BYTE] = static_cast<uint8_t>(HllUtil<A>::FAMILY_ID);
205
+ bytes[HllUtil<A>::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK);
206
+ bytes[HllUtil<A>::LG_ARR_BYTE] = static_cast<uint8_t>(lgCouponArrInts);
207
+ bytes[HllUtil<A>::FLAGS_BYTE] = this->makeFlagsByte(compact);
208
+ bytes[HllUtil<A>::LIST_COUNT_BYTE] = static_cast<uint8_t>(this->mode == LIST ? couponCount : 0);
209
+ bytes[HllUtil<A>::MODE_BYTE] = this->makeModeByte();
210
+
211
+ if (this->mode == SET) {
212
+ std::memcpy(bytes + HllUtil<A>::HASH_SET_COUNT_INT, &couponCount, sizeof(couponCount));
213
+ }
214
+
215
+ // coupons
216
+ // isCompact() is always false for now
217
+ const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
218
+ switch (sw) {
219
+ case 0: { // src updatable, dst updatable
220
+ std::memcpy(bytes + getMemDataStart(), getCouponIntArr(), (1 << lgCouponArrInts) * sizeof(int));
221
+ break;
222
+ }
223
+ case 1: { // src updatable, dst compact
224
+ bytes += getMemDataStart(); // reusing pointer for incremental writes
225
+ for (uint32_t coupon: *this) {
226
+ std::memcpy(bytes, &coupon, sizeof(coupon));
227
+ bytes += sizeof(coupon);
228
+ }
229
+ break;
230
+ }
231
+
232
+ default:
233
+ throw std::runtime_error("Impossible condition when serializing");
234
+ }
235
+
236
+ return byteArr;
237
+ }
238
+
239
+ template<typename A>
240
+ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
241
+ // header
242
+ const uint8_t preInts(getPreInts());
243
+ os.write((char*)&preInts, sizeof(preInts));
244
+ const uint8_t serialVersion(HllUtil<A>::SER_VER);
245
+ os.write((char*)&serialVersion, sizeof(serialVersion));
246
+ const uint8_t familyId(HllUtil<A>::FAMILY_ID);
247
+ os.write((char*)&familyId, sizeof(familyId));
248
+ const uint8_t lgKByte((uint8_t) this->lgConfigK);
249
+ os.write((char*)&lgKByte, sizeof(lgKByte));
250
+ const uint8_t lgArrIntsByte((uint8_t) lgCouponArrInts);
251
+ os.write((char*)&lgArrIntsByte, sizeof(lgArrIntsByte));
252
+ const uint8_t flagsByte(this->makeFlagsByte(compact));
253
+ os.write((char*)&flagsByte, sizeof(flagsByte));
254
+
255
+ if (this->mode == LIST) {
256
+ const uint8_t listCount((uint8_t) couponCount);
257
+ os.write((char*)&listCount, sizeof(listCount));
258
+ } else { // mode == SET
259
+ const uint8_t unused(0);
260
+ os.write((char*)&unused, sizeof(unused));
261
+ }
262
+
263
+ const uint8_t modeByte(this->makeModeByte());
264
+ os.write((char*)&modeByte, sizeof(modeByte));
265
+
266
+ if (this->mode == SET) {
267
+ // writing as int, already stored as int
268
+ os.write((char*)&couponCount, sizeof(couponCount));
269
+ }
270
+
271
+ // coupons
272
+ // isCompact() is always false for now
273
+ const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
274
+ switch (sw) {
275
+ case 0: { // src updatable, dst updatable
276
+ os.write((char*)getCouponIntArr(), (1 << lgCouponArrInts) * sizeof(int));
277
+ break;
278
+ }
279
+ case 1: { // src updatable, dst compact
280
+ for (uint32_t coupon: *this) {
281
+ os.write((char*)&coupon, sizeof(coupon));
282
+ }
283
+ break;
284
+ }
285
+
286
+ default:
287
+ throw std::runtime_error("Impossible condition when serializing");
288
+ }
289
+
290
+ return;
291
+ }
292
+
293
+ template<typename A>
294
+ HllSketchImpl<A>* CouponList<A>::couponUpdate(int coupon) {
295
+ const int len = 1 << lgCouponArrInts;
296
+ for (int i = 0; i < len; ++i) { // search for empty slot
297
+ const int couponAtIdx = couponIntArr[i];
298
+ if (couponAtIdx == HllUtil<A>::EMPTY) {
299
+ couponIntArr[i] = coupon; // the actual update
300
+ ++couponCount;
301
+ if (couponCount >= len) { // array full
302
+ if (this->lgConfigK < 8) {
303
+ return promoteHeapListOrSetToHll(*this);
304
+ }
305
+ return promoteHeapListToSet(*this);
306
+ }
307
+ return this;
308
+ }
309
+ // cell not empty
310
+ if (couponAtIdx == coupon) {
311
+ return this; // duplicate
312
+ }
313
+ // cell not empty and not a duplicate, continue
314
+ }
315
+ throw std::runtime_error("Array invalid: no empties and no duplicates");
316
+ }
317
+
318
+ template<typename A>
319
+ double CouponList<A>::getCompositeEstimate() const { return getEstimate(); }
320
+
321
+ template<typename A>
322
+ double CouponList<A>::getEstimate() const {
323
+ const int couponCount = getCouponCount();
324
+ const double est = CubicInterpolation<A>::usingXAndYTables(couponCount);
325
+ return fmax(est, couponCount);
326
+ }
327
+
328
+ template<typename A>
329
+ double CouponList<A>::getLowerBound(const int numStdDev) const {
330
+ HllUtil<A>::checkNumStdDev(numStdDev);
331
+ const int couponCount = getCouponCount();
332
+ const double est = CubicInterpolation<A>::usingXAndYTables(couponCount);
333
+ const double tmp = est / (1.0 + (numStdDev * HllUtil<A>::COUPON_RSE));
334
+ return fmax(tmp, couponCount);
335
+ }
336
+
337
+ template<typename A>
338
+ double CouponList<A>::getUpperBound(const int numStdDev) const {
339
+ HllUtil<A>::checkNumStdDev(numStdDev);
340
+ const int couponCount = getCouponCount();
341
+ const double est = CubicInterpolation<A>::usingXAndYTables(couponCount);
342
+ const double tmp = est / (1.0 - (numStdDev * HllUtil<A>::COUPON_RSE));
343
+ return fmax(tmp, couponCount);
344
+ }
345
+
346
+ template<typename A>
347
+ bool CouponList<A>::isEmpty() const { return getCouponCount() == 0; }
348
+
349
+ template<typename A>
350
+ int CouponList<A>::getUpdatableSerializationBytes() const {
351
+ return getMemDataStart() + (4 << getLgCouponArrInts());
352
+ }
353
+
354
+ template<typename A>
355
+ int CouponList<A>::getCouponCount() const {
356
+ return couponCount;
357
+ }
358
+
359
+ template<typename A>
360
+ int CouponList<A>::getCompactSerializationBytes() const {
361
+ return getMemDataStart() + (couponCount << 2);
362
+ }
363
+
364
+ template<typename A>
365
+ int CouponList<A>::getMemDataStart() const {
366
+ return HllUtil<A>::LIST_INT_ARR_START;
367
+ }
368
+
369
+ template<typename A>
370
+ int CouponList<A>::getPreInts() const {
371
+ return HllUtil<A>::LIST_PREINTS;
372
+ }
373
+
374
+ template<typename A>
375
+ bool CouponList<A>::isCompact() const { return false; }
376
+
377
+ template<typename A>
378
+ bool CouponList<A>::isOutOfOrderFlag() const { return oooFlag; }
379
+
380
+ template<typename A>
381
+ void CouponList<A>::putOutOfOrderFlag(bool oooFlag) {
382
+ this->oooFlag = oooFlag;
383
+ }
384
+
385
+ template<typename A>
386
+ int CouponList<A>::getLgCouponArrInts() const {
387
+ return lgCouponArrInts;
388
+ }
389
+
390
+ template<typename A>
391
+ int* CouponList<A>::getCouponIntArr() const {
392
+ return couponIntArr;
393
+ }
394
+
395
+ template<typename A>
396
+ HllSketchImpl<A>* CouponList<A>::promoteHeapListToSet(CouponList& list) {
397
+ return HllSketchImplFactory<A>::promoteListToSet(list);
398
+ }
399
+
400
+ template<typename A>
401
+ HllSketchImpl<A>* CouponList<A>::promoteHeapListOrSetToHll(CouponList& src) {
402
+ return HllSketchImplFactory<A>::promoteListOrSetToHll(src);
403
+ }
404
+
405
+ template<typename A>
406
+ coupon_iterator<A> CouponList<A>::begin(bool all) const {
407
+ return coupon_iterator<A>(couponIntArr, 1 << lgCouponArrInts, 0, all);
408
+ }
409
+
410
+ template<typename A>
411
+ coupon_iterator<A> CouponList<A>::end() const {
412
+ return coupon_iterator<A>(couponIntArr, 1 << lgCouponArrInts, 1 << lgCouponArrInts, false);
413
+ }
414
+
415
+ }
416
+
417
+ #endif // _COUPONLIST_INTERNAL_HPP_