datasketches 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,32 @@
1
+ #ifndef _HLL_PRIVATE_HPP_
2
+ #define _HLL_PRIVATE_HPP_
3
+
4
+ #include "AuxHashMap.hpp"
5
+ #include "CompositeInterpolationXTable.hpp"
6
+ #include "CouponHashSet.hpp"
7
+ #include "CouponList.hpp"
8
+ #include "CubicInterpolation.hpp"
9
+ #include "HarmonicNumbers.hpp"
10
+ #include "Hll4Array.hpp"
11
+ #include "Hll6Array.hpp"
12
+ #include "Hll8Array.hpp"
13
+ #include "HllArray.hpp"
14
+ #include "HllSketchImpl.hpp"
15
+ #include "HllSketchImplFactory.hpp"
16
+ #include "HllUtil.hpp"
17
+ #include "RelativeErrorTables.hpp"
18
+
19
+ #include "AuxHashMap-internal.hpp"
20
+ #include "coupon_iterator.hpp"
21
+ #include "CouponHashSet-internal.hpp"
22
+ #include "CouponList-internal.hpp"
23
+ #include "Hll4Array-internal.hpp"
24
+ #include "Hll6Array-internal.hpp"
25
+ #include "Hll8Array-internal.hpp"
26
+ #include "HllArray-internal.hpp"
27
+ #include "HllSketch-internal.hpp"
28
+ #include "HllSketchImpl-internal.hpp"
29
+ #include "HllUnion-internal.hpp"
30
+ #include "coupon_iterator-internal.hpp"
31
+
32
+ #endif // _HLL_PRIVATE_HPP_
@@ -0,0 +1,79 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch.hpp>
21
+ #include <memory>
22
+
23
+ #include "AuxHashMap.hpp"
24
+
25
+ namespace datasketches {
26
+
27
+ TEST_CASE("aux hash map: check must replace", "[aux_hash_map]") {
28
+ AuxHashMap<>* map = new AuxHashMap<>(3, 7);
29
+ map->mustAdd(100, 5);
30
+ int val = map->mustFindValueFor(100);
31
+ REQUIRE(val == 5);
32
+
33
+ map->mustReplace(100, 10);
34
+ val = map->mustFindValueFor(100);
35
+ REQUIRE(val == 10);
36
+
37
+ REQUIRE_THROWS_AS(map->mustReplace(101, 5), std::invalid_argument);
38
+
39
+ delete map;
40
+ }
41
+
42
+ TEST_CASE("aux hash map: check grow space", "[aux_hash_map]") {
43
+ auto map = std::unique_ptr<AuxHashMap<>, std::function<void(AuxHashMap<>*)>>(
44
+ AuxHashMap<>::newAuxHashMap(3, 7),
45
+ AuxHashMap<>::make_deleter()
46
+ );
47
+ REQUIRE(map->getLgAuxArrInts() == 3);
48
+ for (int i = 1; i <= 7; ++i) {
49
+ map->mustAdd(i, i);
50
+ }
51
+ REQUIRE(map->getLgAuxArrInts() == 4);
52
+ auto itr = map->begin(true);
53
+ int count1 = 0;
54
+ int count2 = 0;
55
+ while (itr != map->end()) {
56
+ ++count2;
57
+ int pair = *itr;
58
+ if (pair != 0) { ++count1; }
59
+ ++itr;
60
+ }
61
+ REQUIRE(count1 == 7);
62
+ REQUIRE(count2 == 16);
63
+ }
64
+
65
+ TEST_CASE("aux hash map: check exception must find value for", "[aux_hash_map]") {
66
+ AuxHashMap<> map(3, 7);
67
+ map.mustAdd(100, 5);
68
+ REQUIRE_THROWS_AS(map.mustFindValueFor(101), std::invalid_argument);
69
+ }
70
+
71
+ TEST_CASE("aux hash map: check exception must add", "[aux_hash_map]") {
72
+ AuxHashMap<>* map = AuxHashMap<>::newAuxHashMap(3, 7);
73
+ map->mustAdd(100, 5);
74
+ REQUIRE_THROWS_AS(map->mustAdd(100, 6), std::invalid_argument);
75
+
76
+ AuxHashMap<>::make_deleter()(map);
77
+ }
78
+
79
+ } /* namespace datasketches */
@@ -0,0 +1,51 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ add_executable(hll_test)
19
+
20
+ target_link_libraries(hll_test hll common_test)
21
+
22
+ set_target_properties(hll_test PROPERTIES
23
+ CXX_STANDARD 11
24
+ CXX_STANDARD_REQUIRED YES
25
+ )
26
+
27
+ file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" HLL_TEST_BINARY_PATH)
28
+ string(APPEND HLL_TEST_BINARY_PATH "/")
29
+ target_compile_definitions(hll_test
30
+ PRIVATE
31
+ TEST_BINARY_INPUT_PATH="${HLL_TEST_BINARY_PATH}"
32
+ )
33
+
34
+ add_test(
35
+ NAME hll_test
36
+ COMMAND hll_test
37
+ )
38
+
39
+ target_sources(hll_test
40
+ PRIVATE
41
+ AuxHashMapTest.cpp
42
+ CouponHashSetTest.cpp
43
+ CouponListTest.cpp
44
+ CrossCountingTest.cpp
45
+ HllArrayTest.cpp
46
+ HllSketchTest.cpp
47
+ HllUnionTest.cpp
48
+ TablesTest.cpp
49
+ ToFromByteArrayTest.cpp
50
+ IsomorphicTest.cpp
51
+ )
@@ -0,0 +1,130 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include "hll.hpp"
21
+ #include "CouponHashSet.hpp"
22
+ #include "HllUtil.hpp"
23
+
24
+ #include <catch.hpp>
25
+ #include <ostream>
26
+ #include <cmath>
27
+ #include <string>
28
+ #include <exception>
29
+
30
+ namespace datasketches {
31
+
32
+ TEST_CASE("coupon hash set: check corrupt bytearray", "[coupon_hash_set]") {
33
+ int lgK = 8;
34
+ hll_sketch sk1(lgK);
35
+ for (int i = 0; i < 24; ++i) {
36
+ sk1.update(i);
37
+ }
38
+ auto sketchBytes = sk1.serialize_updatable();
39
+ uint8_t* bytes = sketchBytes.data();
40
+ const size_t size = sketchBytes.size();
41
+
42
+ bytes[HllUtil<>::PREAMBLE_INTS_BYTE] = 0;
43
+ // fail in HllSketchImpl
44
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
45
+ // fail in CouponHashSet
46
+ REQUIRE_THROWS_AS(CouponHashSet<>::newSet(bytes, size), std::invalid_argument);
47
+ bytes[HllUtil<>::PREAMBLE_INTS_BYTE] = HllUtil<>::HASH_SET_PREINTS;
48
+
49
+ bytes[HllUtil<>::SER_VER_BYTE] = 0;
50
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
51
+ bytes[HllUtil<>::SER_VER_BYTE] = HllUtil<>::SER_VER;
52
+
53
+ bytes[HllUtil<>::FAMILY_BYTE] = 0;
54
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
55
+ bytes[HllUtil<>::FAMILY_BYTE] = HllUtil<>::FAMILY_ID;
56
+
57
+ bytes[HllUtil<>::LG_K_BYTE] = 6;
58
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
59
+ bytes[HllUtil<>::LG_K_BYTE] = lgK;
60
+
61
+ uint8_t tmp = bytes[HllUtil<>::MODE_BYTE];
62
+ bytes[HllUtil<>::MODE_BYTE] = 0x10; // HLL_6, LIST
63
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
64
+ bytes[HllUtil<>::MODE_BYTE] = tmp;
65
+
66
+ tmp = bytes[HllUtil<>::LG_ARR_BYTE];
67
+ bytes[HllUtil<>::LG_ARR_BYTE] = 0;
68
+ hll_sketch::deserialize(bytes, size);
69
+ // should work fine despite the corruption
70
+ bytes[HllUtil<>::LG_ARR_BYTE] = tmp;
71
+
72
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size - 1), std::out_of_range);
73
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, 3), std::out_of_range);
74
+ }
75
+
76
+ TEST_CASE("coupon hash set: check corrupt stream", "[coupon_hash_set]") {
77
+ int lgK = 9;
78
+ hll_sketch sk1(lgK);
79
+ for (int i = 0; i < 24; ++i) {
80
+ sk1.update(i);
81
+ }
82
+ std::stringstream ss;
83
+ sk1.serialize_compact(ss);
84
+
85
+ ss.seekp(HllUtil<>::PREAMBLE_INTS_BYTE);
86
+ ss.put(0);
87
+ ss.seekg(0);
88
+ // fail in HllSketchImpl
89
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
90
+ // fail in CouponHashSet
91
+ REQUIRE_THROWS_AS(CouponHashSet<>::newSet(ss), std::invalid_argument);
92
+ ss.seekp(HllUtil<>::PREAMBLE_INTS_BYTE);
93
+ ss.put(HllUtil<>::HASH_SET_PREINTS);
94
+
95
+ ss.seekp(HllUtil<>::SER_VER_BYTE);
96
+ ss.put(0);
97
+ ss.seekg(0);
98
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
99
+ ss.seekp(HllUtil<>::SER_VER_BYTE);
100
+ ss.put(HllUtil<>::SER_VER);
101
+
102
+ ss.seekp(HllUtil<>::FAMILY_BYTE);
103
+ ss.put(0);
104
+ ss.seekg(0);
105
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
106
+ ss.seekp(HllUtil<>::FAMILY_BYTE);
107
+ ss.put(HllUtil<>::FAMILY_ID);
108
+
109
+ ss.seekg(HllUtil<>::MODE_BYTE);
110
+ uint8_t tmp = ss.get();
111
+ ss.seekp(HllUtil<>::MODE_BYTE);
112
+ ss.put(0x22); // HLL_8, HLL
113
+ ss.seekg(0);
114
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
115
+ ss.seekp(HllUtil<>::MODE_BYTE);
116
+ ss.put(tmp);
117
+
118
+ ss.seekg(HllUtil<>::LG_ARR_BYTE);
119
+ tmp = ss.get();
120
+ ss.seekp(HllUtil<>::LG_ARR_BYTE);
121
+ ss.put(0);
122
+ ss.seekg(0);
123
+ hll_sketch::deserialize(ss);
124
+ // should work fine despite the corruption
125
+ ss.seekp(HllUtil<>::LG_ARR_BYTE);
126
+ ss.put(tmp);
127
+ }
128
+
129
+
130
+ } // namespace datasketches
@@ -0,0 +1,181 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch.hpp>
21
+ #include <ostream>
22
+ #include <sstream>
23
+ #include <cmath>
24
+ #include <string>
25
+ #include <exception>
26
+
27
+ #include "hll.hpp"
28
+ #include "CouponList.hpp"
29
+ #include "HllUtil.hpp"
30
+
31
+ namespace datasketches {
32
+
33
+ void println_string(std::string str) {
34
+ //std::cout << str << std::endl;
35
+ }
36
+
37
+ TEST_CASE("coupon list: check iterator", "[coupon_list]") {
38
+ int lgConfigK = 8;
39
+ CouponList<> cl(lgConfigK, HLL_4, LIST);
40
+ for (int i = 1; i <= 7; ++i) { cl.couponUpdate(HllUtil<>::pair(i, i)); } // not hashes but distinct values
41
+ const int mask = (1 << lgConfigK) - 1;
42
+ int idx = 0;
43
+ auto itr = cl.begin(false);
44
+ while (itr != cl.end()) {
45
+ int key = HllUtil<>::getLow26(*itr);
46
+ int val = HllUtil<>::getValue(*itr);
47
+ int slot = HllUtil<>::getLow26(*itr) & mask;
48
+ std::ostringstream oss;
49
+ oss << "Idx: " << idx << ", Key: " << key << ", Val: " << val
50
+ << ", Slot: " << slot;
51
+ println_string(oss.str());
52
+ REQUIRE(val == idx + 1);
53
+ ++itr;
54
+ ++idx;
55
+ }
56
+ }
57
+
58
+ TEST_CASE("coupon list: check duplicates and misc", "[coupon_list]") {
59
+ int lgConfigK = 8;
60
+ hll_sketch sk(lgConfigK);
61
+
62
+ for (int i = 1; i <= 7; ++i) {
63
+ sk.update(i);
64
+ sk.update(i);
65
+ }
66
+ REQUIRE(sk.get_composite_estimate() == Approx(7.0).epsilon(0.1));
67
+
68
+ sk.update(8);
69
+ sk.update(8);
70
+ REQUIRE(sk.get_composite_estimate() == Approx(8.0).epsilon(0.1));
71
+
72
+ for (int i = 9; i <= 25; ++i) {
73
+ sk.update(i);
74
+ sk.update(i);
75
+ }
76
+ REQUIRE(sk.get_composite_estimate() == Approx(25.0).epsilon(0.1));
77
+
78
+ double relErr = sk.get_rel_err(true, true, 4, 1);
79
+ REQUIRE(relErr < 0.0);
80
+ }
81
+
82
+ static void serializeDeserialize(const int lgK) {
83
+ hll_sketch sk1(lgK);
84
+
85
+ int u = (lgK < 8) ? 7 : (((1 << (lgK - 3))/ 4) * 3);
86
+ for (int i = 0; i < u; ++i) {
87
+ sk1.update(i);
88
+ }
89
+ double est1 = sk1.get_estimate();
90
+ REQUIRE(est1 == Approx(u).margin(u * 1e-4));
91
+
92
+ std::stringstream ss(std::ios::in | std::ios::out | std::ios::binary);
93
+ sk1.serialize_compact(ss);
94
+ hll_sketch sk2 = hll_sketch::deserialize(ss);
95
+ double est2 = sk2.get_estimate();
96
+ REQUIRE(est1 == est2);
97
+
98
+ ss.str(std::string());
99
+ ss.clear();
100
+
101
+ sk1.serialize_updatable(ss);
102
+ sk2 = hll_sketch::deserialize(ss);
103
+ est2 = sk2.get_estimate();
104
+ REQUIRE(est1 == est2);
105
+ }
106
+
107
+ TEST_CASE("coupon list: check serialize deserialize", "[coupon_list]") {
108
+ serializeDeserialize(7);
109
+ serializeDeserialize(21);
110
+ }
111
+
112
+ TEST_CASE("coupon list: check corrupt bytearray data", "[coupon_list]") {
113
+ int lgK = 6;
114
+ hll_sketch sk1(lgK);
115
+ sk1.update(1);
116
+ sk1.update(2);
117
+ auto sketchBytes = sk1.serialize_compact();
118
+ uint8_t* bytes = sketchBytes.data();
119
+ const size_t size = sketchBytes.size();
120
+
121
+ bytes[HllUtil<>::PREAMBLE_INTS_BYTE] = 0;
122
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
123
+ REQUIRE_THROWS_AS(CouponList<>::newList(bytes, size), std::invalid_argument);
124
+
125
+ bytes[HllUtil<>::PREAMBLE_INTS_BYTE] = HllUtil<>::LIST_PREINTS;
126
+
127
+ bytes[HllUtil<>::SER_VER_BYTE] = 0;
128
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
129
+ bytes[HllUtil<>::SER_VER_BYTE] = HllUtil<>::SER_VER;
130
+
131
+ bytes[HllUtil<>::FAMILY_BYTE] = 0;
132
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
133
+ bytes[HllUtil<>::FAMILY_BYTE] = HllUtil<>::FAMILY_ID;
134
+
135
+ uint8_t tmp = bytes[HllUtil<>::MODE_BYTE];
136
+ bytes[HllUtil<>::MODE_BYTE] = 0x01; // HLL_4, SET
137
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
138
+ bytes[HllUtil<>::MODE_BYTE] = tmp;
139
+
140
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size - 1), std::out_of_range);
141
+
142
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, 3), std::out_of_range);
143
+ }
144
+
145
+ TEST_CASE("coupon list: check corrupt stream data", "[coupon_list]") {
146
+ int lgK = 6;
147
+ hll_sketch sk1(lgK);
148
+ sk1.update(1);
149
+ sk1.update(2);
150
+ std::stringstream ss;
151
+ sk1.serialize_compact(ss);
152
+
153
+ ss.seekp(HllUtil<>::PREAMBLE_INTS_BYTE);
154
+ ss.put(0);
155
+ ss.seekg(0);
156
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
157
+ REQUIRE_THROWS_AS(CouponList<>::newList(ss), std::invalid_argument);
158
+ ss.seekp(HllUtil<>::PREAMBLE_INTS_BYTE);
159
+ ss.put(HllUtil<>::LIST_PREINTS);
160
+
161
+ ss.seekp(HllUtil<>::SER_VER_BYTE);
162
+ ss.put(0);
163
+ ss.seekg(0);
164
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
165
+ ss.seekp(HllUtil<>::SER_VER_BYTE);
166
+ ss.put(HllUtil<>::SER_VER);
167
+
168
+ ss.seekp(HllUtil<>::FAMILY_BYTE);
169
+ ss.put(0);
170
+ ss.seekg(0);
171
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
172
+ ss.seekp(HllUtil<>::FAMILY_BYTE);
173
+ ss.put(HllUtil<>::FAMILY_ID);
174
+
175
+ ss.seekp(HllUtil<>::MODE_BYTE);
176
+ ss.put(0x22); // HLL_8, HLL
177
+ ss.seekg(0);
178
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
179
+ }
180
+
181
+ } /* namespace datasketches */