datasketches 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,47 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch.hpp>
21
+
22
+ #include <reverse_purge_hash_map.hpp>
23
+
24
+ namespace datasketches {
25
+
26
+ TEST_CASE("reverse purge hash map: empty", "[frequent_items_sketch]") {
27
+ reverse_purge_hash_map<int> map(3, 3);
28
+ REQUIRE(map.get_num_active() == 0);
29
+ REQUIRE(map.get_lg_cur_size() == 3); // static_cast<uint8_t>(3)
30
+ }
31
+
32
+ TEST_CASE("reverse purge hash map: one item", "[frequent_items_sketch]") {
33
+ reverse_purge_hash_map<int> map(3, 3);
34
+ map.adjust_or_insert(1, 1);
35
+ REQUIRE(map.get_num_active() == 1);
36
+ REQUIRE(map.get(1) == 1);
37
+ }
38
+
39
+ TEST_CASE("reverse purge hash map: iterator", "[frequent_items_sketch]") {
40
+ reverse_purge_hash_map<int> map(3, 4);
41
+ for (int i = 0; i < 11; i++) map.adjust_or_insert(i, 1); // this should fit with no purge
42
+ int sum = 0;
43
+ for (auto &it: map) sum += it.second;
44
+ REQUIRE(sum == 11);
45
+ }
46
+
47
+ } /* namespace datasketches */
@@ -0,0 +1,92 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ add_library(hll INTERFACE)
19
+
20
+ add_library(${PROJECT_NAME}::HLL ALIAS hll)
21
+
22
+ if (BUILD_TESTS)
23
+ add_subdirectory(test)
24
+ endif()
25
+
26
+ target_include_directories(hll
27
+ INTERFACE
28
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
29
+ $<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>
30
+ )
31
+
32
+ target_link_libraries(hll INTERFACE common)
33
+ target_compile_features(hll INTERFACE cxx_std_11)
34
+
35
+ # TODO: would be useful if this didn't need to be reproduced in target_sources(), too
36
+ set(hll_HEADERS "")
37
+ list(APPEND hll_HEADERS "include/hll.hpp;include/AuxHashMap.hpp;include/CompositeInterpolationXTable.hpp")
38
+ list(APPEND hll_HEADERS "include/CouponHashSet.hpp;include/CouponList.hpp")
39
+ list(APPEND hll_HEADERS "include/CubicInterpolation.hpp;include/HarmonicNumbers.hpp;include/Hll4Array.hpp")
40
+ list(APPEND hll_HEADERS "include/Hll6Array.hpp;include/Hll8Array.hpp;include/HllArray.hpp")
41
+ list(APPEND hll_HEADERS "include/HllSketchImpl.hpp")
42
+ list(APPEND hll_HEADERS "include/HllUtil.hpp;include/coupon_iterator.hpp")
43
+ list(APPEND hll_HEADERS "include/RelativeErrorTables.hpp;include/AuxHashMap-internal.hpp")
44
+ list(APPEND hll_HEADERS "include/CompositeInterpolationXTable-internal.hpp")
45
+ list(APPEND hll_HEADERS "include/CouponHashSet-internal.hpp;include/CouponList-internal.hpp")
46
+ list(APPEND hll_HEADERS "include/CubicInterpolation-internal.hpp;include/HarmonicNumbers-internal.hpp")
47
+ list(APPEND hll_HEADERS "include/Hll4Array-internal.hpp;include/Hll6Array-internal.hpp")
48
+ list(APPEND hll_HEADERS "include/Hll8Array-internal.hpp;include/HllArray-internal.hpp")
49
+ list(APPEND hll_HEADERS "include/HllSketch-internal.hpp")
50
+ list(APPEND hll_HEADERS "include/HllSketchImpl-internal.hpp;include/HllUnion-internal.hpp")
51
+ list(APPEND hll_HEADERS "include/coupon_iterator-internal.hpp;include/RelativeErrorTables-internal.hpp")
52
+
53
+ install(TARGETS hll
54
+ EXPORT ${PROJECT_NAME}
55
+ )
56
+
57
+ install(FILES ${hll_HEADERS}
58
+ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
59
+
60
+ target_sources(hll
61
+ INTERFACE
62
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/hll.hpp
63
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/AuxHashMap.hpp
64
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/CompositeInterpolationXTable.hpp
65
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/CouponHashSet.hpp
66
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/CouponList.hpp
67
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/CubicInterpolation.hpp
68
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/HarmonicNumbers.hpp
69
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll4Array.hpp
70
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll6Array.hpp
71
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll8Array.hpp
72
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/HllArray.hpp
73
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImpl.hpp
74
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/HllUtil.hpp
75
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/RelativeErrorTables.hpp
76
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/coupon_iterator.hpp
77
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/AuxHashMap-internal.hpp
78
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/CompositeInterpolationXTable-internal.hpp
79
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/CouponHashSet-internal.hpp
80
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/CouponList-internal.hpp
81
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/CubicInterpolation-internal.hpp
82
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/HarmonicNumbers-internal.hpp
83
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll4Array-internal.hpp
84
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll6Array-internal.hpp
85
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll8Array-internal.hpp
86
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/HllArray-internal.hpp
87
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketch-internal.hpp
88
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImpl-internal.hpp
89
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/HllUnion-internal.hpp
90
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/RelativeErrorTables-internal.hpp
91
+ ${CMAKE_CURRENT_SOURCE_DIR}/include/coupon_iterator-internal.hpp
92
+ )
@@ -0,0 +1,303 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _AUXHASHMAP_INTERNAL_HPP_
21
+ #define _AUXHASHMAP_INTERNAL_HPP_
22
+
23
+ #include "HllUtil.hpp"
24
+ #include "AuxHashMap.hpp"
25
+
26
+ namespace datasketches {
27
+
28
+ template<typename A>
29
+ AuxHashMap<A>::AuxHashMap(int lgAuxArrInts, int lgConfigK)
30
+ : lgConfigK(lgConfigK),
31
+ lgAuxArrInts(lgAuxArrInts),
32
+ auxCount(0) {
33
+ typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
34
+ const int numItems = 1 << lgAuxArrInts;
35
+ auxIntArr = intAlloc().allocate(numItems);
36
+ std::fill(auxIntArr, auxIntArr + numItems, 0);
37
+ }
38
+
39
+ template<typename A>
40
+ AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(int lgAuxArrInts, int lgConfigK) {
41
+ return new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK);
42
+ }
43
+
44
+ template<typename A>
45
+ AuxHashMap<A>::AuxHashMap(const AuxHashMap& that)
46
+ : lgConfigK(that.lgConfigK),
47
+ lgAuxArrInts(that.lgAuxArrInts),
48
+ auxCount(that.auxCount) {
49
+ typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
50
+ const int numItems = 1 << lgAuxArrInts;
51
+ auxIntArr = intAlloc().allocate(numItems);
52
+ std::copy(that.auxIntArr, that.auxIntArr + numItems, auxIntArr);
53
+ }
54
+
55
+ template<typename A>
56
+ AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(const AuxHashMap& that) {
57
+ return new (ahmAlloc().allocate(1)) AuxHashMap<A>(that);
58
+ }
59
+
60
+ template<typename A>
61
+ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
62
+ int lgConfigK,
63
+ int auxCount, int lgAuxArrInts,
64
+ bool srcCompact) {
65
+ int lgArrInts = lgAuxArrInts;
66
+ if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
67
+ lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
68
+ } else { // updatable
69
+ lgArrInts = lgAuxArrInts;
70
+ }
71
+
72
+ int configKmask = (1 << lgConfigK) - 1;
73
+
74
+ AuxHashMap<A>* auxHashMap;
75
+ const int* auxPtr = static_cast<const int*>(bytes);
76
+ if (srcCompact) {
77
+ if (len < auxCount * sizeof(int)) {
78
+ throw std::out_of_range("Input array too small to hold AuxHashMap image");
79
+ }
80
+ auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
81
+ for (int i = 0; i < auxCount; ++i) {
82
+ int pair = auxPtr[i];
83
+ int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
84
+ int value = HllUtil<A>::getValue(pair);
85
+ auxHashMap->mustAdd(slotNo, value);
86
+ }
87
+ } else { // updatable
88
+ int itemsToRead = 1 << lgAuxArrInts;
89
+ if (len < itemsToRead * sizeof(int)) {
90
+ throw std::out_of_range("Input array too small to hold AuxHashMap image");
91
+ }
92
+ auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
93
+ for (int i = 0; i < itemsToRead; ++i) {
94
+ int pair = auxPtr[i];
95
+ if (pair == HllUtil<A>::EMPTY) { continue; }
96
+ int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
97
+ int value = HllUtil<A>::getValue(pair);
98
+ auxHashMap->mustAdd(slotNo, value);
99
+ }
100
+ }
101
+
102
+ if (auxHashMap->getAuxCount() != auxCount) {
103
+ make_deleter()(auxHashMap);
104
+ throw std::invalid_argument("Deserialized AuxHashMap has wrong number of entries");
105
+ }
106
+
107
+ return auxHashMap;
108
+ }
109
+
110
+ template<typename A>
111
+ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
112
+ const int auxCount, const int lgAuxArrInts,
113
+ const bool srcCompact) {
114
+ int lgArrInts = lgAuxArrInts;
115
+ if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
116
+ lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
117
+ } else { // updatable
118
+ lgArrInts = lgAuxArrInts;
119
+ }
120
+
121
+ AuxHashMap<A>* auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
122
+ typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr;
123
+ aux_hash_map_ptr aux_ptr(auxHashMap, auxHashMap->make_deleter());
124
+
125
+ int configKmask = (1 << lgConfigK) - 1;
126
+
127
+ if (srcCompact) {
128
+ int pair;
129
+ for (int i = 0; i < auxCount; ++i) {
130
+ is.read((char*)&pair, sizeof(pair));
131
+ int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
132
+ int value = HllUtil<A>::getValue(pair);
133
+ auxHashMap->mustAdd(slotNo, value);
134
+ }
135
+ } else { // updatable
136
+ int itemsToRead = 1 << lgAuxArrInts;
137
+ int pair;
138
+ for (int i = 0; i < itemsToRead; ++i) {
139
+ is.read((char*)&pair, sizeof(pair));
140
+ if (pair == HllUtil<A>::EMPTY) { continue; }
141
+ int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
142
+ int value = HllUtil<A>::getValue(pair);
143
+ auxHashMap->mustAdd(slotNo, value);
144
+ }
145
+ }
146
+
147
+ if (auxHashMap->getAuxCount() != auxCount) {
148
+ make_deleter()(auxHashMap);
149
+ throw std::invalid_argument("Deserialized AuxHashMap has wrong number of entries");
150
+ }
151
+
152
+ return aux_ptr.release();
153
+ }
154
+
155
+ template<typename A>
156
+ AuxHashMap<A>::~AuxHashMap<A>() {
157
+ // should be no way to have an object without a valid array
158
+ typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
159
+ intAlloc().deallocate(auxIntArr, 1 << lgAuxArrInts);
160
+ }
161
+
162
+ template<typename A>
163
+ std::function<void(AuxHashMap<A>*)> AuxHashMap<A>::make_deleter() {
164
+ return [](AuxHashMap<A>* ptr) {
165
+ ptr->~AuxHashMap();
166
+ ahmAlloc().deallocate(ptr, 1);
167
+ };
168
+ }
169
+
170
+ template<typename A>
171
+ AuxHashMap<A>* AuxHashMap<A>::copy() const {
172
+ return new (ahmAlloc().allocate(1)) AuxHashMap<A>(*this);
173
+ }
174
+
175
+ template<typename A>
176
+ int AuxHashMap<A>::getAuxCount() const {
177
+ return auxCount;
178
+ }
179
+
180
+ template<typename A>
181
+ int* AuxHashMap<A>::getAuxIntArr(){
182
+ return auxIntArr;
183
+ }
184
+
185
+ template<typename A>
186
+ int AuxHashMap<A>::getLgAuxArrInts() const {
187
+ return lgAuxArrInts;
188
+ }
189
+
190
+ template<typename A>
191
+ int AuxHashMap<A>::getCompactSizeBytes() const {
192
+ return auxCount << 2;
193
+ }
194
+
195
+ template<typename A>
196
+ int AuxHashMap<A>::getUpdatableSizeBytes() const {
197
+ return 4 << lgAuxArrInts;
198
+ }
199
+
200
+ template<typename A>
201
+ void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
202
+ const int index = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo);
203
+ const int entry_pair = HllUtil<A>::pair(slotNo, value);
204
+ if (index >= 0) {
205
+ throw std::invalid_argument("Found a slotNo that should not be there: SlotNo: "
206
+ + std::to_string(slotNo) + ", Value: " + std::to_string(value));
207
+ }
208
+
209
+ // found empty entry
210
+ auxIntArr[~index] = entry_pair;
211
+ ++auxCount;
212
+ checkGrow();
213
+ }
214
+
215
+ template<typename A>
216
+ int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
217
+ const int index = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo);
218
+ if (index >= 0) {
219
+ return HllUtil<A>::getValue(auxIntArr[index]);
220
+ }
221
+
222
+ throw std::invalid_argument("slotNo not found: " + std::to_string(slotNo));
223
+ }
224
+
225
+ template<typename A>
226
+ void AuxHashMap<A>::mustReplace(const int slotNo, const int value) {
227
+ const int idx = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo);
228
+ if (idx >= 0) {
229
+ auxIntArr[idx] = HllUtil<A>::pair(slotNo, value);
230
+ return;
231
+ }
232
+
233
+ throw std::invalid_argument("Pair not found: SlotNo: " + std::to_string(slotNo)
234
+ + ", Value: " + std::to_string(value));
235
+ }
236
+
237
+ template<typename A>
238
+ void AuxHashMap<A>::checkGrow() {
239
+ if ((HllUtil<A>::RESIZE_DENOM * auxCount) > (HllUtil<A>::RESIZE_NUMER * (1 << lgAuxArrInts))) {
240
+ growAuxSpace();
241
+ }
242
+ }
243
+
244
+ template<typename A>
245
+ void AuxHashMap<A>::growAuxSpace() {
246
+ int* oldArray = auxIntArr;
247
+ const int oldArrLen = 1 << lgAuxArrInts;
248
+ const int configKmask = (1 << lgConfigK) - 1;
249
+ const int newArrLen = 1 << ++lgAuxArrInts;
250
+ typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
251
+ auxIntArr = intAlloc().allocate(newArrLen);
252
+ std::fill(auxIntArr, auxIntArr + newArrLen, 0);
253
+ for (int i = 0; i < oldArrLen; ++i) {
254
+ const int fetched = oldArray[i];
255
+ if (fetched != HllUtil<A>::EMPTY) {
256
+ // find empty in new array
257
+ const int idx = find(auxIntArr, lgAuxArrInts, lgConfigK, fetched & configKmask);
258
+ auxIntArr[~idx] = fetched;
259
+ }
260
+ }
261
+
262
+ intAlloc().deallocate(oldArray, oldArrLen);
263
+ }
264
+
265
+ //Searches the Aux arr hash table for an empty or a matching slotNo depending on the context.
266
+ //If entire entry is empty, returns one's complement of index = found empty.
267
+ //If entry contains given slotNo, returns its index = found slotNo.
268
+ //Continues searching.
269
+ //If the probe comes back to original index, throws an exception.
270
+ template<typename A>
271
+ int AuxHashMap<A>::find(const int* auxArr, const int lgAuxArrInts, const int lgConfigK,
272
+ const int slotNo) {
273
+ const int auxArrMask = (1 << lgAuxArrInts) - 1;
274
+ const int configKmask = (1 << lgConfigK) - 1;
275
+ int probe = slotNo & auxArrMask;
276
+ const int loopIndex = probe;
277
+ do {
278
+ const int arrVal = auxArr[probe];
279
+ if (arrVal == HllUtil<A>::EMPTY) { //Compares on entire entry
280
+ return ~probe; //empty
281
+ }
282
+ else if (slotNo == (arrVal & configKmask)) { //Compares only on slotNo
283
+ return probe; //found given slotNo, return probe = index into aux array
284
+ }
285
+ const int stride = (slotNo >> lgAuxArrInts) | 1;
286
+ probe = (probe + stride) & auxArrMask;
287
+ } while (probe != loopIndex);
288
+ throw std::runtime_error("Key not found and no empty slots!");
289
+ }
290
+
291
+ template<typename A>
292
+ coupon_iterator<A> AuxHashMap<A>::begin(bool all) const {
293
+ return coupon_iterator<A>(auxIntArr, 1 << lgAuxArrInts, 0, all);
294
+ }
295
+
296
+ template<typename A>
297
+ coupon_iterator<A> AuxHashMap<A>::end() const {
298
+ return coupon_iterator<A>(auxIntArr, 1 << lgAuxArrInts, 1 << lgAuxArrInts, false);
299
+ }
300
+
301
+ }
302
+
303
+ #endif // _AUXHASHMAP_INTERNAL_HPP_