datasketches 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,484 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef FREQUENT_ITEMS_SKETCH_IMPL_HPP_
21
+ #define FREQUENT_ITEMS_SKETCH_IMPL_HPP_
22
+
23
+ #include <cstring>
24
+ #include <limits>
25
+ #include <sstream>
26
+
27
+ #include "memory_operations.hpp"
28
+
29
+ namespace datasketches {
30
+
31
+ // clang++ seems to require this declaration for CMAKE_BUILD_TYPE='Debug"
32
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
33
+ const uint8_t frequent_items_sketch<T, W, H, E, S, A>::LG_MIN_MAP_SIZE;
34
+
35
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
36
+ frequent_items_sketch<T, W, H, E, S, A>::frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size):
37
+ total_weight(0),
38
+ offset(0),
39
+ map(std::max(lg_start_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE), std::max(lg_max_map_size, frequent_items_sketch::LG_MIN_MAP_SIZE))
40
+ {
41
+ if (lg_start_map_size > lg_max_map_size) throw std::invalid_argument("starting size must not be greater than maximum size");
42
+ }
43
+
44
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
45
+ void frequent_items_sketch<T, W, H, E, S, A>::update(const T& item, W weight) {
46
+ check_weight(weight);
47
+ if (weight == 0) return;
48
+ total_weight += weight;
49
+ offset += map.adjust_or_insert(item, weight);
50
+ }
51
+
52
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
53
+ void frequent_items_sketch<T, W, H, E, S, A>::update(T&& item, W weight) {
54
+ check_weight(weight);
55
+ if (weight == 0) return;
56
+ total_weight += weight;
57
+ offset += map.adjust_or_insert(std::move(item), weight);
58
+ }
59
+
60
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
61
+ void frequent_items_sketch<T, W, H, E, S, A>::merge(const frequent_items_sketch& other) {
62
+ if (other.is_empty()) return;
63
+ const W merged_total_weight = total_weight + other.get_total_weight(); // for correction at the end
64
+ for (auto &it: other.map) {
65
+ update(it.first, it.second);
66
+ }
67
+ offset += other.offset;
68
+ total_weight = merged_total_weight;
69
+ }
70
+
71
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
72
+ void frequent_items_sketch<T, W, H, E, S, A>::merge(frequent_items_sketch&& other) {
73
+ if (other.is_empty()) return;
74
+ const W merged_total_weight = total_weight + other.get_total_weight(); // for correction at the end
75
+ for (auto &it: other.map) {
76
+ update(std::move(it.first), it.second);
77
+ }
78
+ offset += other.offset;
79
+ total_weight = merged_total_weight;
80
+ }
81
+
82
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
83
+ bool frequent_items_sketch<T, W, H, E, S, A>::is_empty() const {
84
+ return map.get_num_active() == 0;
85
+ }
86
+
87
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
88
+ uint32_t frequent_items_sketch<T, W, H, E, S, A>::get_num_active_items() const {
89
+ return map.get_num_active();
90
+ }
91
+
92
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
93
+ W frequent_items_sketch<T, W, H, E, S, A>::get_total_weight() const {
94
+ return total_weight;
95
+ }
96
+
97
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
98
+ W frequent_items_sketch<T, W, H, E, S, A>::get_estimate(const T& item) const {
99
+ // if item is tracked estimate = weight + offset, otherwise 0
100
+ const W weight = map.get(item);
101
+ if (weight > 0) return weight + offset;
102
+ return 0;
103
+ }
104
+
105
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
106
+ W frequent_items_sketch<T, W, H, E, S, A>::get_lower_bound(const T& item) const {
107
+ return map.get(item);
108
+ }
109
+
110
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
111
+ W frequent_items_sketch<T, W, H, E, S, A>::get_upper_bound(const T& item) const {
112
+ return map.get(item) + offset;
113
+ }
114
+
115
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
116
+ W frequent_items_sketch<T, W, H, E, S, A>::get_maximum_error() const {
117
+ return offset;
118
+ }
119
+
120
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
121
+ double frequent_items_sketch<T, W, H, E, S, A>::get_epsilon() const {
122
+ return EPSILON_FACTOR / (1 << map.get_lg_max_size());
123
+ }
124
+
125
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
126
+ double frequent_items_sketch<T, W, H, E, S, A>::get_epsilon(uint8_t lg_max_map_size) {
127
+ return EPSILON_FACTOR / (1 << lg_max_map_size);
128
+ }
129
+
130
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
131
+ double frequent_items_sketch<T, W, H, E, S, A>::get_apriori_error(uint8_t lg_max_map_size, W estimated_total_weight) {
132
+ return get_epsilon(lg_max_map_size) * estimated_total_weight;
133
+ }
134
+
135
+
136
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
137
+ typename frequent_items_sketch<T, W, H, E, S, A>::vector_row
138
+ frequent_items_sketch<T, W, H, E, S, A>::get_frequent_items(frequent_items_error_type err_type) const {
139
+ return get_frequent_items(err_type, get_maximum_error());
140
+ }
141
+
142
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
143
+ typename frequent_items_sketch<T, W, H, E, S, A>::vector_row
144
+ frequent_items_sketch<T, W, H, E, S, A>::get_frequent_items(frequent_items_error_type err_type, W threshold) const {
145
+ vector_row items;
146
+ for (auto &it: map) {
147
+ const W lb = it.second;
148
+ const W ub = it.second + offset;
149
+ if ((err_type == NO_FALSE_NEGATIVES && ub > threshold) || (err_type == NO_FALSE_POSITIVES && lb > threshold)) {
150
+ items.push_back(row(&it.first, it.second, offset));
151
+ }
152
+ }
153
+ // sort by estimate in descending order
154
+ std::sort(items.begin(), items.end(), [](row a, row b){ return a.get_estimate() > b.get_estimate(); });
155
+ return items;
156
+ }
157
+
158
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
159
+ void frequent_items_sketch<T, W, H, E, S, A>::serialize(std::ostream& os) const {
160
+ const uint8_t preamble_longs = is_empty() ? PREAMBLE_LONGS_EMPTY : PREAMBLE_LONGS_NONEMPTY;
161
+ os.write((char*)&preamble_longs, sizeof(preamble_longs));
162
+ const uint8_t serial_version = SERIAL_VERSION;
163
+ os.write((char*)&serial_version, sizeof(serial_version));
164
+ const uint8_t family = FAMILY_ID;
165
+ os.write((char*)&family, sizeof(family));
166
+ const uint8_t lg_max_size = map.get_lg_max_size();
167
+ os.write((char*)&lg_max_size, sizeof(lg_max_size));
168
+ const uint8_t lg_cur_size = map.get_lg_cur_size();
169
+ os.write((char*)&lg_cur_size, sizeof(lg_cur_size));
170
+ const uint8_t flags_byte(
171
+ (is_empty() ? 1 << flags::IS_EMPTY : 0)
172
+ );
173
+ os.write((char*)&flags_byte, sizeof(flags_byte));
174
+ const uint16_t unused16 = 0;
175
+ os.write((char*)&unused16, sizeof(unused16));
176
+ if (!is_empty()) {
177
+ const uint32_t num_items = map.get_num_active();
178
+ os.write((char*)&num_items, sizeof(num_items));
179
+ const uint32_t unused32 = 0;
180
+ os.write((char*)&unused32, sizeof(unused32));
181
+ os.write((char*)&total_weight, sizeof(total_weight));
182
+ os.write((char*)&offset, sizeof(offset));
183
+
184
+ // copy active items and their weights to use batch serialization
185
+ typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
186
+ W* weights = AllocW().allocate(num_items);
187
+ T* items = A().allocate(num_items);
188
+ uint32_t i = 0;
189
+ for (auto &it: map) {
190
+ new (&items[i]) T(it.first);
191
+ weights[i++] = it.second;
192
+ }
193
+ os.write((char*)weights, sizeof(W) * num_items);
194
+ AllocW().deallocate(weights, num_items);
195
+ S().serialize(os, items, num_items);
196
+ for (unsigned i = 0; i < num_items; i++) items[i].~T();
197
+ A().deallocate(items, num_items);
198
+ }
199
+ }
200
+
201
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
202
+ size_t frequent_items_sketch<T, W, H, E, S, A>::get_serialized_size_bytes() const {
203
+ if (is_empty()) return PREAMBLE_LONGS_EMPTY * sizeof(uint64_t);
204
+ size_t size = PREAMBLE_LONGS_NONEMPTY * sizeof(uint64_t) + map.get_num_active() * sizeof(W);
205
+ for (auto &it: map) size += S().size_of_item(it.first);
206
+ return size;
207
+ }
208
+
209
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
210
+ vector_u8<A> frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_bytes) const {
211
+ const size_t size = header_size_bytes + get_serialized_size_bytes();
212
+ vector_u8<A> bytes(size);
213
+ uint8_t* ptr = bytes.data() + header_size_bytes;
214
+ uint8_t* end_ptr = ptr + size;
215
+
216
+ const uint8_t preamble_longs = is_empty() ? PREAMBLE_LONGS_EMPTY : PREAMBLE_LONGS_NONEMPTY;
217
+ ptr += copy_to_mem(&preamble_longs, ptr, sizeof(uint8_t));
218
+ const uint8_t serial_version = SERIAL_VERSION;
219
+ ptr += copy_to_mem(&serial_version, ptr, sizeof(uint8_t));
220
+ const uint8_t family = FAMILY_ID;
221
+ ptr += copy_to_mem(&family, ptr, sizeof(uint8_t));
222
+ const uint8_t lg_max_size = map.get_lg_max_size();
223
+ ptr += copy_to_mem(&lg_max_size, ptr, sizeof(uint8_t));
224
+ const uint8_t lg_cur_size = map.get_lg_cur_size();
225
+ ptr += copy_to_mem(&lg_cur_size, ptr, sizeof(uint8_t));
226
+ const uint8_t flags_byte(
227
+ (is_empty() ? 1 << flags::IS_EMPTY : 0)
228
+ );
229
+ ptr += copy_to_mem(&flags_byte, ptr, sizeof(uint8_t));
230
+ const uint16_t unused16 = 0;
231
+ ptr += copy_to_mem(&unused16, ptr, sizeof(uint16_t));
232
+ if (!is_empty()) {
233
+ const uint32_t num_items = map.get_num_active();
234
+ ptr += copy_to_mem(&num_items, ptr, sizeof(uint32_t));
235
+ const uint32_t unused32 = 0;
236
+ ptr += copy_to_mem(&unused32, ptr, sizeof(uint32_t));
237
+ ptr += copy_to_mem(&total_weight, ptr, sizeof(total_weight));
238
+ ptr += copy_to_mem(&offset, ptr, sizeof(offset));
239
+
240
+ // copy active items and their weights to use batch serialization
241
+ typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
242
+ W* weights = AllocW().allocate(num_items);
243
+ T* items = A().allocate(num_items);
244
+ uint32_t i = 0;
245
+ for (auto &it: map) {
246
+ new (&items[i]) T(it.first);
247
+ weights[i++] = it.second;
248
+ }
249
+ ptr += copy_to_mem(weights, ptr, sizeof(W) * num_items);
250
+ AllocW().deallocate(weights, num_items);
251
+ const size_t bytes_remaining = end_ptr - ptr;
252
+ ptr += S().serialize(ptr, bytes_remaining, items, num_items);
253
+ for (unsigned i = 0; i < num_items; i++) items[i].~T();
254
+ A().deallocate(items, num_items);
255
+ }
256
+ return bytes;
257
+ }
258
+
259
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
260
+ class frequent_items_sketch<T, W, H, E, S, A>::items_deleter {
261
+ public:
262
+ items_deleter(uint32_t num, bool destroy): num(num), destroy(destroy) {}
263
+ void set_destroy(bool destroy) { this->destroy = destroy; }
264
+ void operator() (T* ptr) const {
265
+ if (ptr != nullptr) {
266
+ if (destroy) {
267
+ for (uint32_t i = 0; i < num; ++i) ptr[i].~T();
268
+ }
269
+ A().deallocate(ptr, num);
270
+ }
271
+ }
272
+ private:
273
+ uint32_t num;
274
+ bool destroy;
275
+ };
276
+
277
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
278
+ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is) {
279
+ uint8_t preamble_longs;
280
+ is.read((char*)&preamble_longs, sizeof(preamble_longs));
281
+ uint8_t serial_version;
282
+ is.read((char*)&serial_version, sizeof(serial_version));
283
+ uint8_t family_id;
284
+ is.read((char*)&family_id, sizeof(family_id));
285
+ uint8_t lg_max_size;
286
+ is.read((char*)&lg_max_size, sizeof(lg_max_size));
287
+ uint8_t lg_cur_size;
288
+ is.read((char*)&lg_cur_size, sizeof(lg_cur_size));
289
+ uint8_t flags_byte;
290
+ is.read((char*)&flags_byte, sizeof(flags_byte));
291
+ uint16_t unused16;
292
+ is.read((char*)&unused16, sizeof(unused16));
293
+
294
+ const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
295
+
296
+ check_preamble_longs(preamble_longs, is_empty);
297
+ check_serial_version(serial_version);
298
+ check_family_id(family_id);
299
+ check_size(lg_cur_size, lg_max_size);
300
+
301
+ frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size);
302
+ if (!is_empty) {
303
+ uint32_t num_items;
304
+ is.read((char*)&num_items, sizeof(num_items));
305
+ uint32_t unused32;
306
+ is.read((char*)&unused32, sizeof(unused32));
307
+ W total_weight;
308
+ is.read((char*)&total_weight, sizeof(total_weight));
309
+ W offset;
310
+ is.read((char*)&offset, sizeof(offset));
311
+
312
+ // batch deserialization with intermediate array of items and weights
313
+ typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
314
+ std::vector<W, AllocW> weights(num_items);
315
+ is.read((char*)weights.data(), sizeof(W) * num_items);
316
+ std::unique_ptr<T, items_deleter> items(A().allocate(num_items), items_deleter(num_items, false));
317
+ S().deserialize(is, items.get(), num_items);
318
+ items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
319
+ for (uint32_t i = 0; i < num_items; i++) {
320
+ sketch.update(std::move(items.get()[i]), weights[i]);
321
+ }
322
+ sketch.total_weight = total_weight;
323
+ sketch.offset = offset;
324
+ }
325
+ if (!is.good())
326
+ throw std::runtime_error("error reading from std::istream");
327
+ return sketch;
328
+ }
329
+
330
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
331
+ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size) {
332
+ ensure_minimum_memory(size, 8);
333
+ const char* ptr = static_cast<const char*>(bytes);
334
+ const char* base = static_cast<const char*>(bytes);
335
+ uint8_t preamble_longs;
336
+ ptr += copy_from_mem(ptr, &preamble_longs, sizeof(uint8_t));
337
+ uint8_t serial_version;
338
+ ptr += copy_from_mem(ptr, &serial_version, sizeof(uint8_t));
339
+ uint8_t family_id;
340
+ ptr += copy_from_mem(ptr, &family_id, sizeof(uint8_t));
341
+ uint8_t lg_max_size;
342
+ ptr += copy_from_mem(ptr, &lg_max_size, sizeof(uint8_t));
343
+ uint8_t lg_cur_size;
344
+ ptr += copy_from_mem(ptr, &lg_cur_size, sizeof(uint8_t));
345
+ uint8_t flags_byte;
346
+ ptr += copy_from_mem(ptr, &flags_byte, sizeof(uint8_t));
347
+ uint16_t unused16;
348
+ ptr += copy_from_mem(ptr, &unused16, sizeof(uint16_t));
349
+
350
+ const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
351
+
352
+ check_preamble_longs(preamble_longs, is_empty);
353
+ check_serial_version(serial_version);
354
+ check_family_id(family_id);
355
+ check_size(lg_cur_size, lg_max_size);
356
+ ensure_minimum_memory(size, 1 << preamble_longs);
357
+
358
+ frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size);
359
+ if (!is_empty) {
360
+ uint32_t num_items;
361
+ ptr += copy_from_mem(ptr, &num_items, sizeof(uint32_t));
362
+ uint32_t unused32;
363
+ ptr += copy_from_mem(ptr, &unused32, sizeof(uint32_t));
364
+ W total_weight;
365
+ ptr += copy_from_mem(ptr, &total_weight, sizeof(total_weight));
366
+ W offset;
367
+ ptr += copy_from_mem(ptr, &offset, sizeof(offset));
368
+
369
+ ensure_minimum_memory(size, ptr - base + (sizeof(W) * num_items));
370
+ // batch deserialization with intermediate array of items and weights
371
+ typedef typename std::allocator_traits<A>::template rebind_alloc<W> AllocW;
372
+ std::vector<W, AllocW> weights(num_items);
373
+ ptr += copy_from_mem(ptr, weights.data(), sizeof(W) * num_items);
374
+ std::unique_ptr<T, items_deleter> items(A().allocate(num_items), items_deleter(num_items, false));
375
+ const size_t bytes_remaining = size - (ptr - base);
376
+ ptr += S().deserialize(ptr, bytes_remaining, items.get(), num_items);
377
+ items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
378
+ for (uint32_t i = 0; i < num_items; i++) {
379
+ sketch.update(std::move(items.get()[i]), weights[i]);
380
+ }
381
+
382
+ sketch.total_weight = total_weight;
383
+ sketch.offset = offset;
384
+ }
385
+ return sketch;
386
+ }
387
+
388
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
389
+ void frequent_items_sketch<T, W, H, E, S, A>::check_preamble_longs(uint8_t preamble_longs, bool is_empty) {
390
+ if (is_empty) {
391
+ if (preamble_longs != PREAMBLE_LONGS_EMPTY) {
392
+ throw std::invalid_argument("Possible corruption: preamble longs of an empty sketch must be " + std::to_string(PREAMBLE_LONGS_EMPTY) + ": " + std::to_string(preamble_longs));
393
+ }
394
+ } else {
395
+ if (preamble_longs != PREAMBLE_LONGS_NONEMPTY) {
396
+ throw std::invalid_argument("Possible corruption: preamble longs of an non-empty sketch must be " + std::to_string(PREAMBLE_LONGS_NONEMPTY) + ": " + std::to_string(preamble_longs));
397
+ }
398
+ }
399
+ }
400
+
401
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
402
+ void frequent_items_sketch<T, W, H, E, S, A>::check_serial_version(uint8_t serial_version) {
403
+ if (serial_version != SERIAL_VERSION) {
404
+ throw std::invalid_argument("Possible corruption: serial version must be " + std::to_string(SERIAL_VERSION) + ": " + std::to_string(serial_version));
405
+ }
406
+ }
407
+
408
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
409
+ void frequent_items_sketch<T, W, H, E, S, A>::check_family_id(uint8_t family_id) {
410
+ if (family_id != FAMILY_ID) {
411
+ throw std::invalid_argument("Possible corruption: family ID must be " + std::to_string(FAMILY_ID) + ": " + std::to_string(family_id));
412
+ }
413
+ }
414
+
415
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
416
+ void frequent_items_sketch<T, W, H, E, S, A>::check_size(uint8_t lg_cur_size, uint8_t lg_max_size) {
417
+ if (lg_cur_size > lg_max_size) {
418
+ throw std::invalid_argument("Possible corruption: expected lg_cur_size <= lg_max_size: " + std::to_string(lg_cur_size) + " <= " + std::to_string(lg_max_size));
419
+ }
420
+ if (lg_cur_size < LG_MIN_MAP_SIZE) {
421
+ throw std::invalid_argument("Possible corruption: lg_cur_size must not be less than " + std::to_string(LG_MIN_MAP_SIZE) + ": " + std::to_string(lg_cur_size));
422
+ }
423
+ }
424
+
425
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
426
+ string<A> frequent_items_sketch<T, W, H, E, S, A>::to_string(bool print_items) const {
427
+ std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
428
+ os << "### Frequent items sketch summary:" << std::endl;
429
+ os << " lg cur map size : " << (int) map.get_lg_cur_size() << std::endl;
430
+ os << " lg max map size : " << (int) map.get_lg_max_size() << std::endl;
431
+ os << " num active items : " << get_num_active_items() << std::endl;
432
+ os << " total weight : " << get_total_weight() << std::endl;
433
+ os << " max error : " << get_maximum_error() << std::endl;
434
+ os << "### End sketch summary" << std::endl;
435
+ if (print_items) {
436
+ vector_row items;
437
+ for (auto &it: map) {
438
+ items.push_back(row(&it.first, it.second, offset));
439
+ }
440
+ // sort by estimate in descending order
441
+ std::sort(items.begin(), items.end(), [](row a, row b){ return a.get_estimate() > b.get_estimate(); });
442
+ os << "### Items in descending order by estimate" << std::endl;
443
+ os << " item, estimate, lower bound, upper bound" << std::endl;
444
+ for (auto &it: items) {
445
+ os << " " << it.get_item() << ", " << it.get_estimate() << ", "
446
+ << it.get_lower_bound() << ", " << it.get_upper_bound() << std::endl;
447
+ }
448
+ os << "### End items" << std::endl;
449
+ }
450
+ return os.str();
451
+ }
452
+
453
+ // version for integral signed type
454
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
455
+ template<typename WW, typename std::enable_if<std::is_integral<WW>::value && std::is_signed<WW>::value, int>::type>
456
+ void frequent_items_sketch<T, W, H, E, S, A>::check_weight(WW weight) {
457
+ if (weight < 0) {
458
+ throw std::invalid_argument("weight must be non-negative");
459
+ }
460
+ }
461
+
462
+ // version for integral unsigned type - no-op
463
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
464
+ template<typename WW, typename std::enable_if<std::is_integral<WW>::value && std::is_unsigned<WW>::value, int>::type>
465
+ void frequent_items_sketch<T, W, H, E, S, A>::check_weight(WW) {}
466
+
467
+ // version for floating point type
468
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
469
+ template<typename WW, typename std::enable_if<std::is_floating_point<WW>::value, int>::type>
470
+ void frequent_items_sketch<T, W, H, E, S, A>::check_weight(WW weight) {
471
+ if (weight < 0) {
472
+ throw std::invalid_argument("weight must be non-negative");
473
+ }
474
+ if (std::isnan(weight)) {
475
+ throw std::invalid_argument("weight must be a valid number");
476
+ }
477
+ if (std::isinf(weight)) {
478
+ throw std::invalid_argument("weight must be finite");
479
+ }
480
+ }
481
+
482
+ }
483
+
484
+ #endif