datasketches 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,93 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch.hpp>
21
+ #include <ostream>
22
+ #include <cmath>
23
+ #include <string>
24
+
25
+ #include "hll.hpp"
26
+
27
+ namespace datasketches {
28
+
29
+ static hll_sketch buildSketch(const int n, const int lgK, const target_hll_type tgtHllType) {
30
+ hll_sketch sketch(lgK, tgtHllType);
31
+ for (int i = 0; i < n; ++i) {
32
+ sketch.update(i);
33
+ }
34
+ return sketch;
35
+ }
36
+
37
+ static void crossCountingCheck(const int lgK, const int n) {
38
+ hll_sketch sk4 = buildSketch(n, lgK, HLL_4);
39
+ const double est = sk4.get_estimate();
40
+ const double lb = sk4.get_lower_bound(1);
41
+ const double ub = sk4.get_upper_bound(1);
42
+
43
+ hll_sketch sk6 = buildSketch(n, lgK, HLL_6);
44
+ REQUIRE(sk6.get_estimate() == est);
45
+ REQUIRE(sk6.get_lower_bound(1) == lb);
46
+ REQUIRE(sk6.get_upper_bound(1) == ub);
47
+
48
+ hll_sketch sk8 = buildSketch(n, lgK, HLL_8);
49
+ REQUIRE(sk8.get_estimate() == est);
50
+ REQUIRE(sk8.get_lower_bound(1) == lb);
51
+ REQUIRE(sk8.get_upper_bound(1) == ub);
52
+
53
+ // Conversions
54
+ hll_sketch sk4to6(sk4, HLL_6);
55
+ REQUIRE(sk4to6.get_estimate() == est);
56
+ REQUIRE(sk4to6.get_lower_bound(1) == lb);
57
+ REQUIRE(sk4to6.get_upper_bound(1) == ub);
58
+
59
+ hll_sketch sk4to8(sk4, HLL_8);
60
+ REQUIRE(sk4to8.get_estimate() == est);
61
+ REQUIRE(sk4to8.get_lower_bound(1) == lb);
62
+ REQUIRE(sk4to8.get_upper_bound(1) == ub);
63
+
64
+ hll_sketch sk6to4(sk6, HLL_4);
65
+ REQUIRE(sk6to4.get_estimate() == est);
66
+ REQUIRE(sk6to4.get_lower_bound(1) == lb);
67
+ REQUIRE(sk6to4.get_upper_bound(1) == ub);
68
+
69
+ hll_sketch sk6to8(sk6, HLL_8);
70
+ REQUIRE(sk6to8.get_estimate() == est);
71
+ REQUIRE(sk6to8.get_lower_bound(1) == lb);
72
+ REQUIRE(sk6to8.get_upper_bound(1) == ub);
73
+
74
+ hll_sketch sk8to4(sk8, HLL_4);
75
+ REQUIRE(sk8to4.get_estimate() == est);
76
+ REQUIRE(sk8to4.get_lower_bound(1) == lb);
77
+ REQUIRE(sk8to4.get_upper_bound(1) == ub);
78
+
79
+ hll_sketch sk8to6(sk8, HLL_6);
80
+ REQUIRE(sk8to6.get_estimate() == est);
81
+ REQUIRE(sk8to6.get_lower_bound(1) == lb);
82
+ REQUIRE(sk8to6.get_upper_bound(1) == ub);
83
+ }
84
+
85
+ TEST_CASE("cross counting: cross counting checks", "[cross_counting]") {
86
+ crossCountingCheck(4, 100);
87
+ crossCountingCheck(4, 10000);
88
+ crossCountingCheck(12, 7);
89
+ crossCountingCheck(12, 384);
90
+ crossCountingCheck(12, 10000);
91
+ }
92
+
93
+ } /* namespace datasketches */
@@ -0,0 +1,191 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include "hll.hpp"
21
+
22
+ #include <exception>
23
+ #include <sstream>
24
+ #include <catch.hpp>
25
+
26
+ namespace datasketches {
27
+
28
+ static void testComposite(const int lgK, const target_hll_type tgtHllType, const int n) {
29
+ hll_union u(lgK);
30
+ hll_sketch sk(lgK, tgtHllType);
31
+ for (int i = 0; i < n; ++i) {
32
+ u.update(i);
33
+ sk.update(i);
34
+ }
35
+ u.update(sk); // merge
36
+ hll_sketch res = u.get_result(target_hll_type::HLL_8);
37
+ double est = res.get_composite_estimate();
38
+ REQUIRE(sk.get_composite_estimate() == est);
39
+ }
40
+
41
+ TEST_CASE("hll array: check composite estimate", "[hll_array]") {
42
+ testComposite(4, target_hll_type::HLL_8, 10000);
43
+ testComposite(5, target_hll_type::HLL_8, 10000);
44
+ testComposite(6, target_hll_type::HLL_8, 10000);
45
+ testComposite(13, target_hll_type::HLL_8, 10000);
46
+ }
47
+
48
+ static void serializeDeserialize(const int lgK, target_hll_type tgtHllType, const int n) {
49
+ hll_sketch sk1(lgK, tgtHllType);
50
+
51
+ for (int i = 0; i < n; ++i) {
52
+ sk1.update(i);
53
+ }
54
+ //REQUIRE(sk1.getCurrentMode() == CurMode::HLL);
55
+
56
+ double est1 = sk1.get_estimate();
57
+ REQUIRE(est1 == Approx(n).margin(n * 0.03));
58
+
59
+ // serialize as compact and updatable, deserialize, compare estimates are exact
60
+ std::stringstream ss(std::ios::in | std::ios::out | std::ios::binary);
61
+ sk1.serialize_compact(ss);
62
+ hll_sketch sk2 = hll_sketch::deserialize(ss);
63
+ REQUIRE(sk1.get_estimate() == sk2.get_estimate());
64
+
65
+ ss.clear();
66
+ sk1.serialize_updatable(ss);
67
+ sk2 = hll_sketch::deserialize(ss);
68
+ REQUIRE(sk1.get_estimate() == sk2.get_estimate());
69
+
70
+ sk1.reset();
71
+ REQUIRE(sk1.get_estimate() == 0.0);
72
+ }
73
+
74
+ TEST_CASE("hll array: check serialize deserialize", "[hll_array]") {
75
+ int lgK = 4;
76
+ int n = 8;
77
+ serializeDeserialize(lgK, HLL_4, n);
78
+ serializeDeserialize(lgK, HLL_6, n);
79
+ serializeDeserialize(lgK, HLL_8, n);
80
+
81
+ lgK = 15;
82
+ n = (((1 << (lgK - 3))*3)/4) + 100;
83
+ serializeDeserialize(lgK, HLL_4, n);
84
+ serializeDeserialize(lgK, HLL_6, n);
85
+ serializeDeserialize(lgK, HLL_8, n);
86
+
87
+ lgK = 21;
88
+ n = (((1 << (lgK - 3))*3)/4) + 1000;
89
+ serializeDeserialize(lgK, HLL_4, n);
90
+ serializeDeserialize(lgK, HLL_6, n);
91
+ serializeDeserialize(lgK, HLL_8, n);
92
+ }
93
+
94
+ TEST_CASE("hll array: check is compact", "[hll_array]") {
95
+ hll_sketch sk(4);
96
+ for (int i = 0; i < 8; ++i) {
97
+ sk.update(i);
98
+ }
99
+ REQUIRE_FALSE(sk.is_compact());
100
+ }
101
+
102
+ TEST_CASE("hll array: check corrupt bytearray", "[hll_array]") {
103
+ int lgK = 8;
104
+ hll_sketch sk1(lgK, HLL_8);
105
+ for (int i = 0; i < 50; ++i) {
106
+ sk1.update(i);
107
+ }
108
+ auto sketchBytes = sk1.serialize_compact();
109
+ uint8_t* bytes = sketchBytes.data();
110
+ const size_t size = sketchBytes.size();
111
+
112
+ bytes[HllUtil<>::PREAMBLE_INTS_BYTE] = 0;
113
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
114
+ REQUIRE_THROWS_AS(HllArray<>::newHll(bytes, size), std::invalid_argument);
115
+ bytes[HllUtil<>::PREAMBLE_INTS_BYTE] = HllUtil<>::HLL_PREINTS;
116
+
117
+ bytes[HllUtil<>::SER_VER_BYTE] = 0;
118
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
119
+ bytes[HllUtil<>::SER_VER_BYTE] = HllUtil<>::SER_VER;
120
+
121
+ bytes[HllUtil<>::FAMILY_BYTE] = 0;
122
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
123
+ bytes[HllUtil<>::FAMILY_BYTE] = HllUtil<>::FAMILY_ID;
124
+
125
+ uint8_t tmp = bytes[HllUtil<>::MODE_BYTE];
126
+ bytes[HllUtil<>::MODE_BYTE] = 0x10; // HLL_6, LIST
127
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
128
+ bytes[HllUtil<>::MODE_BYTE] = tmp;
129
+
130
+ tmp = bytes[HllUtil<>::LG_ARR_BYTE];
131
+ bytes[HllUtil<>::LG_ARR_BYTE] = 0;
132
+ hll_sketch::deserialize(bytes, size);
133
+ // should work fine despite the corruption
134
+ bytes[HllUtil<>::LG_ARR_BYTE] = tmp;
135
+
136
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size - 1), std::out_of_range);
137
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, 3), std::out_of_range);
138
+ }
139
+
140
+ TEST_CASE("hll array: check corrupt stream", "[hll_array]") {
141
+ int lgK = 6;
142
+ hll_sketch sk1(lgK);
143
+ for (int i = 0; i < 50; ++i) {
144
+ sk1.update(i);
145
+ }
146
+ std::stringstream ss;
147
+ sk1.serialize_compact(ss);
148
+
149
+ ss.seekp(HllUtil<>::PREAMBLE_INTS_BYTE);
150
+ ss.put(0);
151
+ ss.seekg(0);
152
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
153
+ REQUIRE_THROWS_AS(HllArray<>::newHll(ss), std::invalid_argument);
154
+ ss.seekp(HllUtil<>::PREAMBLE_INTS_BYTE);
155
+ ss.put(HllUtil<>::HLL_PREINTS);
156
+
157
+ ss.seekp(HllUtil<>::SER_VER_BYTE);
158
+ ss.put(0);
159
+ ss.seekg(0);
160
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
161
+ ss.seekp(HllUtil<>::SER_VER_BYTE);
162
+ ss.put(HllUtil<>::SER_VER);
163
+
164
+ ss.seekp(HllUtil<>::FAMILY_BYTE);
165
+ ss.put(0);
166
+ ss.seekg(0);
167
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
168
+ ss.seekp(HllUtil<>::FAMILY_BYTE);
169
+ ss.put(HllUtil<>::FAMILY_ID);
170
+
171
+ ss.seekg(HllUtil<>::MODE_BYTE);
172
+ uint8_t tmp = ss.get();
173
+ ss.seekp(HllUtil<>::MODE_BYTE);
174
+ ss.put(0x11); // HLL_6, SET
175
+ ss.seekg(0);
176
+ REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
177
+ ss.seekp(HllUtil<>::MODE_BYTE);
178
+ ss.put(tmp);
179
+
180
+ ss.seekg(HllUtil<>::LG_ARR_BYTE);
181
+ tmp = ss.get();
182
+ ss.seekp(HllUtil<>::LG_ARR_BYTE);
183
+ ss.put(0);
184
+ ss.seekg(0);
185
+ hll_sketch::deserialize(ss);
186
+ // should work fine despite the corruption
187
+ ss.seekp(HllUtil<>::LG_ARR_BYTE);
188
+ ss.put(tmp);
189
+ }
190
+
191
+ } /* namespace datasketches */
@@ -0,0 +1,389 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include "hll.hpp"
21
+
22
+ #include <catch.hpp>
23
+ #include <test_allocator.hpp>
24
+
25
+ namespace datasketches {
26
+
27
+ typedef hll_sketch_alloc<test_allocator<void>> hll_sketch_test_alloc;
28
+
29
+ static void runCheckCopy(int lgConfigK, target_hll_type tgtHllType) {
30
+ hll_sketch_test_alloc sk(lgConfigK, tgtHllType);
31
+
32
+ for (int i = 0; i < 7; ++i) {
33
+ sk.update(i);
34
+ }
35
+
36
+ hll_sketch_test_alloc skCopy = sk;
37
+ REQUIRE(sk.get_estimate() == skCopy.get_estimate());
38
+
39
+ // no access to hllSketchImpl, so we'll ensure those differ by adding more
40
+ // data to sk and ensuring the mode and estimates differ
41
+ for (int i = 7; i < 24; ++i) {
42
+ sk.update(i);
43
+ }
44
+ REQUIRE(16.0 < (sk.get_estimate() - skCopy.get_estimate()));
45
+
46
+ skCopy = sk;
47
+ REQUIRE(sk.get_estimate() == skCopy.get_estimate());
48
+
49
+ int u = (sk.get_target_type() == HLL_4) ? 100000 : 25;
50
+ for (int i = 24; i < u; ++i) {
51
+ sk.update(i);
52
+ }
53
+ REQUIRE(sk.get_estimate() != skCopy.get_estimate()); // either 1 or 100k difference
54
+
55
+ skCopy = sk;
56
+ REQUIRE(sk.get_estimate() == skCopy.get_estimate());
57
+ }
58
+
59
+ TEST_CASE("hll sketch: check copies", "[hll_sketch]") {
60
+ test_allocator_total_bytes = 0;
61
+ runCheckCopy(14, HLL_4);
62
+ runCheckCopy(8, HLL_6);
63
+ runCheckCopy(8, HLL_8);
64
+ REQUIRE(test_allocator_total_bytes == 0);
65
+ }
66
+
67
+ static void copyAs(target_hll_type srcType, target_hll_type dstType) {
68
+ int lgK = 8;
69
+ int n1 = 7;
70
+ int n2 = 24;
71
+ int n3 = 1000;
72
+ int base = 0;
73
+
74
+ hll_sketch_test_alloc src(lgK, srcType);
75
+ for (int i = 0; i < n1; ++i) {
76
+ src.update(i + base);
77
+ }
78
+ hll_sketch_test_alloc dst(src, dstType);
79
+ REQUIRE(src.get_estimate() == dst.get_estimate());
80
+
81
+ for (int i = n1; i < n2; ++i) {
82
+ src.update(i + base);
83
+ }
84
+ dst = hll_sketch_test_alloc(src, dstType);
85
+ REQUIRE(src.get_estimate() == dst.get_estimate());
86
+
87
+ for (int i = n2; i < n3; ++i) {
88
+ src.update(i + base);
89
+ }
90
+ dst = hll_sketch_test_alloc(src, dstType);
91
+ REQUIRE(src.get_estimate() == dst.get_estimate());
92
+ }
93
+
94
+ TEST_CASE("hll sketch: check copy as", "[hll_sketch]") {
95
+ test_allocator_total_bytes = 0;
96
+ copyAs(HLL_4, HLL_4);
97
+ copyAs(HLL_4, HLL_6);
98
+ copyAs(HLL_4, HLL_8);
99
+ copyAs(HLL_6, HLL_4);
100
+ copyAs(HLL_6, HLL_6);
101
+ copyAs(HLL_6, HLL_8);
102
+ copyAs(HLL_8, HLL_4);
103
+ copyAs(HLL_8, HLL_6);
104
+ copyAs(HLL_8, HLL_8);
105
+ REQUIRE(test_allocator_total_bytes == 0);
106
+ }
107
+
108
+ TEST_CASE("hll sketch: check misc1", "[hll_sketch]") {
109
+ test_allocator_total_bytes = 0;
110
+ {
111
+ int lgConfigK = 8;
112
+ target_hll_type srcType = target_hll_type::HLL_8;
113
+ hll_sketch_test_alloc sk(lgConfigK, srcType);
114
+
115
+ for (int i = 0; i < 7; ++i) { sk.update(i); } // LIST
116
+ REQUIRE(sk.get_compact_serialization_bytes() == 36);
117
+ REQUIRE(sk.get_updatable_serialization_bytes() == 40);
118
+
119
+ for (int i = 7; i < 24; ++i) { sk.update(i); } // SET
120
+ REQUIRE(sk.get_compact_serialization_bytes() == 108);
121
+ REQUIRE(sk.get_updatable_serialization_bytes() == 140);
122
+
123
+ sk.update(24); // HLL
124
+ REQUIRE(sk.get_updatable_serialization_bytes() == 40 + 256);
125
+
126
+ const int hllBytes = HllUtil<>::HLL_BYTE_ARR_START + (1 << lgConfigK);
127
+ REQUIRE(sk.get_compact_serialization_bytes() == hllBytes);
128
+ REQUIRE(hll_sketch::get_max_updatable_serialization_bytes(lgConfigK, HLL_8) == hllBytes);
129
+ }
130
+ REQUIRE(test_allocator_total_bytes == 0);
131
+ }
132
+
133
+ TEST_CASE("hll sketch: check num std dev", "[hll_sketch]") {
134
+ REQUIRE_THROWS_AS(HllUtil<>::checkNumStdDev(0), std::invalid_argument);
135
+ }
136
+
137
+ void checkSerializationSizes(const int lgConfigK, target_hll_type tgtHllType) {
138
+ hll_sketch_test_alloc sk(lgConfigK, tgtHllType);
139
+ int i;
140
+
141
+ // LIST
142
+ for (i = 0; i < 7; ++i) { sk.update(i); }
143
+ int expected = HllUtil<>::LIST_INT_ARR_START + (i << 2);
144
+ REQUIRE(sk.get_compact_serialization_bytes() == expected);
145
+ expected = HllUtil<>::LIST_INT_ARR_START + (4 << HllUtil<>::LG_INIT_LIST_SIZE);
146
+ REQUIRE(sk.get_updatable_serialization_bytes() == expected);
147
+
148
+ // SET
149
+ for (i = 7; i < 24; ++i) { sk.update(i); }
150
+ expected = HllUtil<>::HASH_SET_INT_ARR_START + (i << 2);
151
+ REQUIRE(sk.get_compact_serialization_bytes() == expected);
152
+ expected = HllUtil<>::HASH_SET_INT_ARR_START + (4 << HllUtil<>::LG_INIT_SET_SIZE);
153
+ REQUIRE(sk.get_updatable_serialization_bytes() == expected);
154
+ }
155
+
156
+ TEST_CASE("hll sketch: check ser sizes", "[hll_sketch]") {
157
+ test_allocator_total_bytes = 0;
158
+ checkSerializationSizes(8, HLL_8);
159
+ checkSerializationSizes(8, HLL_6);
160
+ checkSerializationSizes(8, HLL_4);
161
+ REQUIRE(test_allocator_total_bytes == 0);
162
+ }
163
+
164
+ TEST_CASE("hll sketch: exercise to string", "[hll_sketch]") {
165
+ test_allocator_total_bytes = 0;
166
+ {
167
+ hll_sketch_test_alloc sk(15, HLL_4);
168
+ for (int i = 0; i < 25; ++i) { sk.update(i); }
169
+ std::ostringstream oss(std::ios::binary);
170
+ oss << sk.to_string(false, true, true, true);
171
+ for (int i = 25; i < (1 << 20); ++i) { sk.update(i); }
172
+ oss << sk.to_string(false, true, true, true);
173
+ oss << sk.to_string(false, true, true, false);
174
+
175
+ sk = hll_sketch_test_alloc(8, HLL_8);
176
+ for (int i = 0; i < 25; ++i) { sk.update(i); }
177
+ oss << sk.to_string(false, true, true, true);
178
+ }
179
+ REQUIRE(test_allocator_total_bytes == 0);
180
+ }
181
+
182
+ // Creates and serializes then deserializes sketch.
183
+ // Returns true if deserialized sketch is compact.
184
+ static bool checkCompact(const int lgK, const int n, const target_hll_type type, bool compact) {
185
+ hll_sketch_test_alloc sk(lgK, type);
186
+ for (int i = 0; i < n; ++i) { sk.update(i); }
187
+
188
+ std::stringstream ss(std::ios::in | std::ios::out | std::ios::binary);
189
+ if (compact) {
190
+ sk.serialize_compact(ss);
191
+ REQUIRE(ss.tellp() == sk.get_compact_serialization_bytes());
192
+ } else {
193
+ sk.serialize_updatable(ss);
194
+ REQUIRE(ss.tellp() == sk.get_updatable_serialization_bytes());
195
+ }
196
+
197
+ hll_sketch_test_alloc sk2 = hll_sketch_test_alloc::deserialize(ss);
198
+ REQUIRE(sk2.get_estimate() == Approx(n).margin(0.01));
199
+ bool isCompact = sk2.is_compact();
200
+
201
+ return isCompact;
202
+ }
203
+
204
+ TEST_CASE("hll sketch: check compact flag", "[hll_sketch]") {
205
+ test_allocator_total_bytes = 0;
206
+ {
207
+ int lgK = 8;
208
+ // unless/until we create non-updatable "direct" versions,
209
+ // deserialized image should never be compact
210
+ // LIST: follows serialization request
211
+ REQUIRE(checkCompact(lgK, 7, HLL_8, false) == false);
212
+ REQUIRE(checkCompact(lgK, 7, HLL_8, true) == false);
213
+
214
+ // SET: follows serialization request
215
+ REQUIRE(checkCompact(lgK, 24, HLL_8, false) == false);
216
+ REQUIRE(checkCompact(lgK, 24, HLL_8, true) == false);
217
+
218
+ // HLL8: always updatable
219
+ REQUIRE(checkCompact(lgK, 25, HLL_8, false) == false);
220
+ REQUIRE(checkCompact(lgK, 25, HLL_8, true) == false);
221
+
222
+ // HLL6: always updatable
223
+ REQUIRE(checkCompact(lgK, 25, HLL_6, false) == false);
224
+ REQUIRE(checkCompact(lgK, 25, HLL_6, true) == false);
225
+
226
+ // HLL4: follows serialization request
227
+ REQUIRE(checkCompact(lgK, 25, HLL_4, false) == false);
228
+ REQUIRE(checkCompact(lgK, 25, HLL_4, true) == false);
229
+ }
230
+ REQUIRE(test_allocator_total_bytes == 0);
231
+ }
232
+
233
+ TEST_CASE("hll sketch: check k limits", "[hll_sketch]") {
234
+ test_allocator_total_bytes = 0;
235
+ {
236
+ hll_sketch_test_alloc sketch1(HllUtil<>::MIN_LOG_K, target_hll_type::HLL_8);
237
+ hll_sketch_test_alloc sketch2(HllUtil<>::MAX_LOG_K, target_hll_type::HLL_4);
238
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc(HllUtil<>::MIN_LOG_K - 1), std::invalid_argument);
239
+
240
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc(HllUtil<>::MAX_LOG_K + 1), std::invalid_argument);
241
+ }
242
+ REQUIRE(test_allocator_total_bytes == 0);
243
+ }
244
+
245
+ TEST_CASE("hll sketch: check input types", "[hll_sketch]") {
246
+ test_allocator_total_bytes = 0;
247
+ {
248
+ hll_sketch_test_alloc sk(8, target_hll_type::HLL_8);
249
+
250
+ // inserting the same value as a variety of input types
251
+ sk.update((uint8_t) 102);
252
+ sk.update((uint16_t) 102);
253
+ sk.update((uint32_t) 102);
254
+ sk.update((uint64_t) 102);
255
+ sk.update((int8_t) 102);
256
+ sk.update((int16_t) 102);
257
+ sk.update((int32_t) 102);
258
+ sk.update((int64_t) 102);
259
+ REQUIRE(sk.get_estimate() == Approx(1.0).margin(0.01));
260
+
261
+ // identical binary representations
262
+ // no unsigned in Java, but need to sign-extend both as Java would do
263
+ sk.update((uint8_t) 255);
264
+ sk.update((int8_t) -1);
265
+
266
+ sk.update((float) -2.0);
267
+ sk.update((double) -2.0);
268
+
269
+ std::string str = "input string";
270
+ sk.update(str);
271
+ sk.update(str.c_str(), str.length());
272
+ REQUIRE(sk.get_estimate() == Approx(4.0).margin(0.01));
273
+
274
+ sk = hll_sketch_test_alloc(8, target_hll_type::HLL_6);
275
+ sk.update((float) 0.0);
276
+ sk.update((float) -0.0);
277
+ sk.update((double) 0.0);
278
+ sk.update((double) -0.0);
279
+ REQUIRE(sk.get_estimate() == Approx(1.0).margin(0.01));
280
+
281
+ sk = hll_sketch_test_alloc(8, target_hll_type::HLL_4);
282
+ sk.update(std::nanf("3"));
283
+ sk.update(std::nan("9"));
284
+ REQUIRE(sk.get_estimate() == Approx(1.0).margin(0.01));
285
+
286
+ sk = hll_sketch_test_alloc(8, target_hll_type::HLL_4);
287
+ sk.update(nullptr, 0);
288
+ sk.update("");
289
+ REQUIRE(sk.is_empty());
290
+ }
291
+ REQUIRE(test_allocator_total_bytes == 0);
292
+ }
293
+
294
+ TEST_CASE("hll sketch: deserialize list mode buffer overrun", "[hll_sketch]") {
295
+ test_allocator_total_bytes = 0;
296
+ {
297
+ hll_sketch_test_alloc sketch(10);
298
+ sketch.update(1);
299
+ auto bytes = sketch.serialize_compact();
300
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), 7), std::out_of_range);
301
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
302
+
303
+ // ckeck for leaks on stream exceptions
304
+ {
305
+ std::stringstream ss;
306
+ ss.exceptions(std::ios::failbit | std::ios::badbit);
307
+ ss.str(std::string((char*)bytes.data(), 7));
308
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss), std::ios_base::failure);
309
+ }
310
+ {
311
+ std::stringstream ss;
312
+ ss.exceptions(std::ios::failbit | std::ios::badbit);
313
+ ss.str(std::string((char*)bytes.data(), bytes.size() - 1));
314
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss), std::ios_base::failure);
315
+ }
316
+ }
317
+ REQUIRE(test_allocator_total_bytes == 0);
318
+ }
319
+
320
+ TEST_CASE("hll sketch: deserialize set mode buffer overrun", "[hll_sketch]") {
321
+ test_allocator_total_bytes = 0;
322
+ {
323
+ hll_sketch_test_alloc sketch(10);
324
+ for (int i = 0; i < 10; ++i) sketch.update(i);
325
+ //std::cout << sketch.to_string();
326
+ auto bytes = sketch.serialize_updatable();
327
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), 7), std::out_of_range);
328
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
329
+
330
+ // ckeck for leaks on stream exceptions
331
+ {
332
+ std::stringstream ss;
333
+ ss.exceptions(std::ios::failbit | std::ios::badbit);
334
+ ss.str(std::string((char*)bytes.data(), 7));
335
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss), std::ios_base::failure);
336
+ }
337
+ {
338
+ std::stringstream ss;
339
+ ss.exceptions(std::ios::failbit | std::ios::badbit);
340
+ ss.str(std::string((char*)bytes.data(), bytes.size() - 1));
341
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss), std::ios_base::failure);
342
+ }
343
+ }
344
+ REQUIRE(test_allocator_total_bytes == 0);
345
+ }
346
+
347
+ TEST_CASE("hll sketch: deserialize HLL mode buffer overrun", "[hll_sketch]") {
348
+ test_allocator_total_bytes = 0;
349
+ {
350
+ // this sketch should have aux table
351
+ hll_sketch_test_alloc sketch(15);
352
+ for (int i = 0; i < 14444; ++i) sketch.update(i);
353
+ //std::cout << sketch.to_string();
354
+ auto bytes = sketch.serialize_compact();
355
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), 7), std::out_of_range);
356
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), 15), std::out_of_range);
357
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), 16420), std::out_of_range); // before aux table
358
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
359
+
360
+ // ckeck for leaks on stream exceptions
361
+ {
362
+ std::stringstream ss;
363
+ ss.exceptions(std::ios::failbit | std::ios::badbit);
364
+ ss.str(std::string((char*)bytes.data(), 7));
365
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss), std::ios_base::failure);
366
+ }
367
+ {
368
+ std::stringstream ss;
369
+ ss.exceptions(std::ios::failbit | std::ios::badbit);
370
+ ss.str(std::string((char*)bytes.data(), 15));
371
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss), std::ios_base::failure);
372
+ }
373
+ {
374
+ std::stringstream ss;
375
+ ss.exceptions(std::ios::failbit | std::ios::badbit);
376
+ ss.str(std::string((char*)bytes.data(), 16420)); // before aux table
377
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss), std::ios_base::failure);
378
+ }
379
+ {
380
+ std::stringstream ss;
381
+ ss.exceptions(std::ios::failbit | std::ios::badbit);
382
+ ss.str(std::string((char*)bytes.data(), bytes.size() - 1));
383
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss), std::ios_base::failure);
384
+ }
385
+ }
386
+ REQUIRE(test_allocator_total_bytes == 0);
387
+ }
388
+
389
+ } /* namespace datasketches */