datasketches 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,484 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef FREQUENT_ITEMS_SKETCH_IMPL_HPP_
21
+ #define FREQUENT_ITEMS_SKETCH_IMPL_HPP_
22
+
23
+ #include <cstring>
24
+ #include <limits>
25
+ #include <sstream>
26
+
27
+ #include "memory_operations.hpp"
28
+
29
+ namespace datasketches {
30
+
31
+ // clang++ seems to require this declaration for CMAKE_BUILD_TYPE='Debug"
32
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
33
+ const uint8_t frequent_items_sketch<T, W, H, E, S, A>::LG_MIN_MAP_SIZE;
34
+
35
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
36
+ frequent_items_sketch<T, W, H, E, S, A>::frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size):
37
+ total_weight(0),
38
+ offset(0),
39
+ map(std::max(lg_start_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE), std::max(lg_max_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE))
40
+ {
41
+ if (lg_start_map_size > lg_max_map_size) throw std::invalid_argument("starting size must not be greater than maximum size");
42
+ }
43
+
44
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
45
+ void frequent_items_sketch<T, W, H, E, S, A>::update(const T& item, W weight) {
46
+ check_weight(weight);
47
+ if (weight == 0) return;
48
+ total_weight += weight;
49
+ offset += map.adjust_or_insert(item, weight);
50
+ }
51
+
52
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
53
+ void frequent_items_sketch<T, W, H, E, S, A>::update(T&& item, W weight) {
54
+ check_weight(weight);
55
+ if (weight == 0) return;
56
+ total_weight += weight;
57
+ offset += map.adjust_or_insert(std::move(item), weight);
58
+ }
59
+
60
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
61
+ void frequent_items_sketch<T, W, H, E, S, A>::merge(const frequent_items_sketch& other) {
62
+ if (other.is_empty()) return;
63
+ const W merged_total_weight = total_weight + other.get_total_weight(); // for correction at the end
64
+ for (auto &it: other.map) {
65
+ update(it.first, it.second);
66
+ }
67
+ offset += other.offset;
68
+ total_weight = merged_total_weight;
69
+ }
70
+
71
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
72
+ void frequent_items_sketch<T, W, H, E, S, A>::merge(frequent_items_sketch&& other) {
73
+ if (other.is_empty()) return;
74
+ const W merged_total_weight = total_weight + other.get_total_weight(); // for correction at the end
75
+ for (auto &it: other.map) {
76
+ update(std::move(it.first), it.second);
77
+ }
78
+ offset += other.offset;
79
+ total_weight = merged_total_weight;
80
+ }
81
+
82
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
83
+ bool frequent_items_sketch<T, W, H, E, S, A>::is_empty() const {
84
+ return map.get_num_active() == 0;
85
+ }
86
+
87
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
88
+ uint32_t frequent_items_sketch<T, W, H, E, S, A>::get_num_active_items() const {
89
+ return map.get_num_active();
90
+ }
91
+
92
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
93
+ W frequent_items_sketch<T, W, H, E, S, A>::get_total_weight() const {
94
+ return total_weight;
95
+ }
96
+
97
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
98
+ W frequent_items_sketch<T, W, H, E, S, A>::get_estimate(const T& item) const {
99
+ // if item is tracked estimate = weight + offset, otherwise 0
100
+ const W weight = map.get(item);
101
+ if (weight > 0) return weight + offset;
102
+ return 0;
103
+ }
104
+
105
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
106
+ W frequent_items_sketch<T, W, H, E, S, A>::get_lower_bound(const T& item) const {
107
+ return map.get(item);
108
+ }
109
+
110
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
111
+ W frequent_items_sketch<T, W, H, E, S, A>::get_upper_bound(const T& item) const {
112
+ return map.get(item) + offset;
113
+ }
114
+
115
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
116
+ W frequent_items_sketch<T, W, H, E, S, A>::get_maximum_error() const {
117
+ return offset;
118
+ }
119
+
120
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
121
+ double frequent_items_sketch<T, W, H, E, S, A>::get_epsilon() const {
122
+ return EPSILON_FACTOR / (1 << map.get_lg_max_size());
123
+ }
124
+
125
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
126
+ double frequent_items_sketch<T, W, H, E, S, A>::get_epsilon(uint8_t lg_max_map_size) {
127
+ return EPSILON_FACTOR / (1 << lg_max_map_size);
128
+ }
129
+
130
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
131
+ double frequent_items_sketch<T, W, H, E, S, A>::get_apriori_error(uint8_t lg_max_map_size, W estimated_total_weight) {
132
+ return get_epsilon(lg_max_map_size) * estimated_total_weight;
133
+ }
134
+
135
+
136
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
137
+ typename frequent_items_sketch<T, W, H, E, S, A>::vector_row
138
+ frequent_items_sketch<T, W, H, E, S, A>::get_frequent_items(frequent_items_error_type err_type) const {
139
+ return get_frequent_items(err_type, get_maximum_error());
140
+ }
141
+
142
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
143
+ typename frequent_items_sketch<T, W, H, E, S, A>::vector_row
144
+ frequent_items_sketch<T, W, H, E, S, A>::get_frequent_items(frequent_items_error_type err_type, W threshold) const {
145
+ vector_row items;
146
+ for (auto &it: map) {
147
+ const W lb = it.second;
148
+ const W ub = it.second + offset;
149
+ if ((err_type == NO_FALSE_NEGATIVES && ub > threshold) || (err_type == NO_FALSE_POSITIVES && lb > threshold)) {
150
+ items.push_back(row(&it.first, it.second, offset));
151
+ }
152
+ }
153
+ // sort by estimate in descending order
154
+ std::sort(items.begin(), items.end(), [](row a, row b){ return a.get_estimate() > b.get_estimate(); });
155
+ return items;
156
+ }
157
+
158
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
159
+ void frequent_items_sketch<T, W, H, E, S, A>::serialize(std::ostream& os) const {
160
+ const uint8_t preamble_longs = is_empty() ? PREAMBLE_LONGS_EMPTY : PREAMBLE_LONGS_NONEMPTY;
161
+ os.write((char*)&preamble_longs, sizeof(preamble_longs));
162
+ const uint8_t serial_version = SERIAL_VERSION;
163
+ os.write((char*)&serial_version, sizeof(serial_version));
164
+ const uint8_t family = FAMILY_ID;
165
+ os.write((char*)&family, sizeof(family));
166
+ const uint8_t lg_max_size = map.get_lg_max_size();
167
+ os.write((char*)&lg_max_size, sizeof(lg_max_size));
168
+ const uint8_t lg_cur_size = map.get_lg_cur_size();
169
+ os.write((char*)&lg_cur_size, sizeof(lg_cur_size));
170
+ const uint8_t flags_byte(
171
+ (is_empty() ? 1 << flags::IS_EMPTY : 0)
172
+ );
173
+ os.write((char*)&flags_byte, sizeof(flags_byte));
174
+ const uint16_t unused16 = 0;
175
+ os.write((char*)&unused16, sizeof(unused16));
176
+ if (!is_empty()) {
177
+ const uint32_t num_items = map.get_num_active();
178
+ os.write((char*)&num_items, sizeof(num_items));
179
+ const uint32_t unused32 = 0;
180
+ os.write((char*)&unused32, sizeof(unused32));
181
+ os.write((char*)&total_weight, sizeof(total_weight));
182
+ os.write((char*)&offset, sizeof(offset));
183
+
184
+ // copy active items and their weights to use batch serialization
185
+ typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
186
+ W* weights = AllocW().allocate(num_items);
187
+ T* items = A().allocate(num_items);
188
+ uint32_t i = 0;
189
+ for (auto &it: map) {
190
+ new (&items[i]) T(it.first);
191
+ weights[i++] = it.second;
192
+ }
193
+ os.write((char*)weights, sizeof(W) * num_items);
194
+ AllocW().deallocate(weights, num_items);
195
+ S().serialize(os, items, num_items);
196
+ for (unsigned i = 0; i < num_items; i++) items[i].~T();
197
+ A().deallocate(items, num_items);
198
+ }
199
+ }
200
+
201
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
202
+ size_t frequent_items_sketch<T, W, H, E, S, A>::get_serialized_size_bytes() const {
203
+ if (is_empty()) return PREAMBLE_LONGS_EMPTY * sizeof(uint64_t);
204
+ size_t size = PREAMBLE_LONGS_NONEMPTY * sizeof(uint64_t) + map.get_num_active() * sizeof(W);
205
+ for (auto &it: map) size += S().size_of_item(it.first);
206
+ return size;
207
+ }
208
+
209
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
210
+ vector_u8<A> frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_bytes) const {
211
+ const size_t size = header_size_bytes + get_serialized_size_bytes();
212
+ vector_u8<A> bytes(size);
213
+ uint8_t* ptr = bytes.data() + header_size_bytes;
214
+ uint8_t* end_ptr = ptr + size;
215
+
216
+ const uint8_t preamble_longs = is_empty() ? PREAMBLE_LONGS_EMPTY : PREAMBLE_LONGS_NONEMPTY;
217
+ ptr += copy_to_mem(&preamble_longs, ptr, sizeof(uint8_t));
218
+ const uint8_t serial_version = SERIAL_VERSION;
219
+ ptr += copy_to_mem(&serial_version, ptr, sizeof(uint8_t));
220
+ const uint8_t family = FAMILY_ID;
221
+ ptr += copy_to_mem(&family, ptr, sizeof(uint8_t));
222
+ const uint8_t lg_max_size = map.get_lg_max_size();
223
+ ptr += copy_to_mem(&lg_max_size, ptr, sizeof(uint8_t));
224
+ const uint8_t lg_cur_size = map.get_lg_cur_size();
225
+ ptr += copy_to_mem(&lg_cur_size, ptr, sizeof(uint8_t));
226
+ const uint8_t flags_byte(
227
+ (is_empty() ? 1 << flags::IS_EMPTY : 0)
228
+ );
229
+ ptr += copy_to_mem(&flags_byte, ptr, sizeof(uint8_t));
230
+ const uint16_t unused16 = 0;
231
+ ptr += copy_to_mem(&unused16, ptr, sizeof(uint16_t));
232
+ if (!is_empty()) {
233
+ const uint32_t num_items = map.get_num_active();
234
+ ptr += copy_to_mem(&num_items, ptr, sizeof(uint32_t));
235
+ const uint32_t unused32 = 0;
236
+ ptr += copy_to_mem(&unused32, ptr, sizeof(uint32_t));
237
+ ptr += copy_to_mem(&total_weight, ptr, sizeof(total_weight));
238
+ ptr += copy_to_mem(&offset, ptr, sizeof(offset));
239
+
240
+ // copy active items and their weights to use batch serialization
241
+ typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
242
+ W* weights = AllocW().allocate(num_items);
243
+ T* items = A().allocate(num_items);
244
+ uint32_t i = 0;
245
+ for (auto &it: map) {
246
+ new (&items[i]) T(it.first);
247
+ weights[i++] = it.second;
248
+ }
249
+ ptr += copy_to_mem(weights, ptr, sizeof(W) * num_items);
250
+ AllocW().deallocate(weights, num_items);
251
+ const size_t bytes_remaining = end_ptr - ptr;
252
+ ptr += S().serialize(ptr, bytes_remaining, items, num_items);
253
+ for (unsigned i = 0; i < num_items; i++) items[i].~T();
254
+ A().deallocate(items, num_items);
255
+ }
256
+ return bytes;
257
+ }
258
+
259
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
260
+ class frequent_items_sketch<T, W, H, E, S, A>::items_deleter {
261
+ public:
262
+ items_deleter(uint32_t num, bool destroy): num(num), destroy(destroy) {}
263
+ void set_destroy(bool destroy) { this->destroy = destroy; }
264
+ void operator() (T* ptr) const {
265
+ if (ptr != nullptr) {
266
+ if (destroy) {
267
+ for (uint32_t i = 0; i < num; ++i) ptr[i].~T();
268
+ }
269
+ A().deallocate(ptr, num);
270
+ }
271
+ }
272
+ private:
273
+ uint32_t num;
274
+ bool destroy;
275
+ };
276
+
277
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
278
+ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is) {
279
+ uint8_t preamble_longs;
280
+ is.read((char*)&preamble_longs, sizeof(preamble_longs));
281
+ uint8_t serial_version;
282
+ is.read((char*)&serial_version, sizeof(serial_version));
283
+ uint8_t family_id;
284
+ is.read((char*)&family_id, sizeof(family_id));
285
+ uint8_t lg_max_size;
286
+ is.read((char*)&lg_max_size, sizeof(lg_max_size));
287
+ uint8_t lg_cur_size;
288
+ is.read((char*)&lg_cur_size, sizeof(lg_cur_size));
289
+ uint8_t flags_byte;
290
+ is.read((char*)&flags_byte, sizeof(flags_byte));
291
+ uint16_t unused16;
292
+ is.read((char*)&unused16, sizeof(unused16));
293
+
294
+ const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
295
+
296
+ check_preamble_longs(preamble_longs, is_empty);
297
+ check_serial_version(serial_version);
298
+ check_family_id(family_id);
299
+ check_size(lg_cur_size, lg_max_size);
300
+
301
+ frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size);
302
+ if (!is_empty) {
303
+ uint32_t num_items;
304
+ is.read((char*)&num_items, sizeof(num_items));
305
+ uint32_t unused32;
306
+ is.read((char*)&unused32, sizeof(unused32));
307
+ W total_weight;
308
+ is.read((char*)&total_weight, sizeof(total_weight));
309
+ W offset;
310
+ is.read((char*)&offset, sizeof(offset));
311
+
312
+ // batch deserialization with intermediate array of items and weights
313
+ typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
314
+ std::vector<W, AllocW> weights(num_items);
315
+ is.read((char*)weights.data(), sizeof(W) * num_items);
316
+ std::unique_ptr<T, items_deleter> items(A().allocate(num_items), items_deleter(num_items, false));
317
+ S().deserialize(is, items.get(), num_items);
318
+ items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
319
+ for (uint32_t i = 0; i < num_items; i++) {
320
+ sketch.update(std::move(items.get()[i]), weights[i]);
321
+ }
322
+ sketch.total_weight = total_weight;
323
+ sketch.offset = offset;
324
+ }
325
+ if (!is.good())
326
+ throw std::runtime_error("error reading from std::istream");
327
+ return sketch;
328
+ }
329
+
330
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
331
+ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size) {
332
+ ensure_minimum_memory(size, 8);
333
+ const char* ptr = static_cast<const char*>(bytes);
334
+ const char* base = static_cast<const char*>(bytes);
335
+ uint8_t preamble_longs;
336
+ ptr += copy_from_mem(ptr, &preamble_longs, sizeof(uint8_t));
337
+ uint8_t serial_version;
338
+ ptr += copy_from_mem(ptr, &serial_version, sizeof(uint8_t));
339
+ uint8_t family_id;
340
+ ptr += copy_from_mem(ptr, &family_id, sizeof(uint8_t));
341
+ uint8_t lg_max_size;
342
+ ptr += copy_from_mem(ptr, &lg_max_size, sizeof(uint8_t));
343
+ uint8_t lg_cur_size;
344
+ ptr += copy_from_mem(ptr, &lg_cur_size, sizeof(uint8_t));
345
+ uint8_t flags_byte;
346
+ ptr += copy_from_mem(ptr, &flags_byte, sizeof(uint8_t));
347
+ uint16_t unused16;
348
+ ptr += copy_from_mem(ptr, &unused16, sizeof(uint16_t));
349
+
350
+ const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
351
+
352
+ check_preamble_longs(preamble_longs, is_empty);
353
+ check_serial_version(serial_version);
354
+ check_family_id(family_id);
355
+ check_size(lg_cur_size, lg_max_size);
356
+ ensure_minimum_memory(size, 1 << preamble_longs);
357
+
358
+ frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size);
359
+ if (!is_empty) {
360
+ uint32_t num_items;
361
+ ptr += copy_from_mem(ptr, &num_items, sizeof(uint32_t));
362
+ uint32_t unused32;
363
+ ptr += copy_from_mem(ptr, &unused32, sizeof(uint32_t));
364
+ W total_weight;
365
+ ptr += copy_from_mem(ptr, &total_weight, sizeof(total_weight));
366
+ W offset;
367
+ ptr += copy_from_mem(ptr, &offset, sizeof(offset));
368
+
369
+ ensure_minimum_memory(size, ptr - base + (sizeof(W) * num_items));
370
+ // batch deserialization with intermediate array of items and weights
371
+ typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
372
+ std::vector<W, AllocW> weights(num_items);
373
+ ptr += copy_from_mem(ptr, weights.data(), sizeof(W) * num_items);
374
+ std::unique_ptr<T, items_deleter> items(A().allocate(num_items), items_deleter(num_items, false));
375
+ const size_t bytes_remaining = size - (ptr - base);
376
+ ptr += S().deserialize(ptr, bytes_remaining, items.get(), num_items);
377
+ items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
378
+ for (uint32_t i = 0; i < num_items; i++) {
379
+ sketch.update(std::move(items.get()[i]), weights[i]);
380
+ }
381
+
382
+ sketch.total_weight = total_weight;
383
+ sketch.offset = offset;
384
+ }
385
+ return sketch;
386
+ }
387
+
388
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
389
+ void frequent_items_sketch<T, W, H, E, S, A>::check_preamble_longs(uint8_t preamble_longs, bool is_empty) {
390
+ if (is_empty) {
391
+ if (preamble_longs != PREAMBLE_LONGS_EMPTY) {
392
+ throw std::invalid_argument("Possible corruption: preamble longs of an empty sketch must be " + std::to_string(PREAMBLE_LONGS_EMPTY) + ": " + std::to_string(preamble_longs));
393
+ }
394
+ } else {
395
+ if (preamble_longs != PREAMBLE_LONGS_NONEMPTY) {
396
+ throw std::invalid_argument("Possible corruption: preamble longs of an non-empty sketch must be " + std::to_string(PREAMBLE_LONGS_NONEMPTY) + ": " + std::to_string(preamble_longs));
397
+ }
398
+ }
399
+ }
400
+
401
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
402
+ void frequent_items_sketch<T, W, H, E, S, A>::check_serial_version(uint8_t serial_version) {
403
+ if (serial_version != SERIAL_VERSION) {
404
+ throw std::invalid_argument("Possible corruption: serial version must be " + std::to_string(SERIAL_VERSION) + ": " + std::to_string(serial_version));
405
+ }
406
+ }
407
+
408
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
409
+ void frequent_items_sketch<T, W, H, E, S, A>::check_family_id(uint8_t family_id) {
410
+ if (family_id != FAMILY_ID) {
411
+ throw std::invalid_argument("Possible corruption: family ID must be " + std::to_string(FAMILY_ID) + ": " + std::to_string(family_id));
412
+ }
413
+ }
414
+
415
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
416
+ void frequent_items_sketch<T, W, H, E, S, A>::check_size(uint8_t lg_cur_size, uint8_t lg_max_size) {
417
+ if (lg_cur_size > lg_max_size) {
418
+ throw std::invalid_argument("Possible corruption: expected lg_cur_size <= lg_max_size: " + std::to_string(lg_cur_size) + " <= " + std::to_string(lg_max_size));
419
+ }
420
+ if (lg_cur_size < LG_MIN_MAP_SIZE) {
421
+ throw std::invalid_argument("Possible corruption: lg_cur_size must not be less than " + std::to_string(LG_MIN_MAP_SIZE) + ": " + std::to_string(lg_cur_size));
422
+ }
423
+ }
424
+
425
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
426
+ string<A> frequent_items_sketch<T, W, H, E, S, A>::to_string(bool print_items) const {
427
+ std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
428
+ os << "### Frequent items sketch summary:" << std::endl;
429
+ os << " lg cur map size : " << (int) map.get_lg_cur_size() << std::endl;
430
+ os << " lg max map size : " << (int) map.get_lg_max_size() << std::endl;
431
+ os << " num active items : " << get_num_active_items() << std::endl;
432
+ os << " total weight : " << get_total_weight() << std::endl;
433
+ os << " max error : " << get_maximum_error() << std::endl;
434
+ os << "### End sketch summary" << std::endl;
435
+ if (print_items) {
436
+ vector_row items;
437
+ for (auto &it: map) {
438
+ items.push_back(row(&it.first, it.second, offset));
439
+ }
440
+ // sort by estimate in descending order
441
+ std::sort(items.begin(), items.end(), [](row a, row b){ return a.get_estimate() > b.get_estimate(); });
442
+ os << "### Items in descending order by estimate" << std::endl;
443
+ os << " item, estimate, lower bound, upper bound" << std::endl;
444
+ for (auto &it: items) {
445
+ os << " " << it.get_item() << ", " << it.get_estimate() << ", "
446
+ << it.get_lower_bound() << ", " << it.get_upper_bound() << std::endl;
447
+ }
448
+ os << "### End items" << std::endl;
449
+ }
450
+ return os.str();
451
+ }
452
+
453
+ // version for integral signed type
454
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
455
+ template<typename WW, typename std::enable_if<std::is_integral<WW>::value && std::is_signed<WW>::value, int>::type>
456
+ void frequent_items_sketch<T, W, H, E, S, A>::check_weight(WW weight) {
457
+ if (weight < 0) {
458
+ throw std::invalid_argument("weight must be non-negative");
459
+ }
460
+ }
461
+
462
+ // version for integral unsigned type - no-op
463
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
464
+ template<typename WW, typename std::enable_if<std::is_integral<WW>::value && std::is_unsigned<WW>::value, int>::type>
465
+ void frequent_items_sketch<T, W, H, E, S, A>::check_weight(WW) {}
466
+
467
+ // version for floating point type
468
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
469
+ template<typename WW, typename std::enable_if<std::is_floating_point<WW>::value, int>::type>
470
+ void frequent_items_sketch<T, W, H, E, S, A>::check_weight(WW weight) {
471
+ if (weight < 0) {
472
+ throw std::invalid_argument("weight must be non-negative");
473
+ }
474
+ if (std::isnan(weight)) {
475
+ throw std::invalid_argument("weight must be a valid number");
476
+ }
477
+ if (std::isinf(weight)) {
478
+ throw std::invalid_argument("weight must be finite");
479
+ }
480
+ }
481
+
482
+ }
483
+
484
+ #endif