datasketches 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,70 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef CONDITIONAL_FORWARD_HPP_
21
+ #define CONDITIONAL_FORWARD_HPP_
22
+
23
+ #include <type_traits>
24
+
25
+ namespace datasketches {
26
+
27
+ // Forward type T2 as rvalue reference if type T1 is rvalue reference
28
+
29
+ template<typename T1, typename T2>
30
+ using fwd_type = typename std::conditional<std::is_lvalue_reference<T1>::value,
31
+ T2, typename std::remove_reference<T2>::type&&>::type;
32
+
33
+ template<typename T1, typename T2>
34
+ fwd_type<T1, T2> conditional_forward(T2&& value) {
35
+ return std::forward<fwd_type<T1, T2>>(std::forward<T2>(value));
36
+ }
37
+
38
+ // Forward container as iterators
39
+
40
+ template<typename Container>
41
+ auto forward_begin(Container&& c) ->
42
+ typename std::enable_if<std::is_lvalue_reference<Container>::value, decltype(c.begin())>::type
43
+ {
44
+ return c.begin();
45
+ }
46
+
47
+ template<typename Container>
48
+ auto forward_begin(Container&& c) ->
49
+ typename std::enable_if<!std::is_lvalue_reference<Container>::value, decltype(std::make_move_iterator(c.begin()))>::type
50
+ {
51
+ return std::make_move_iterator(c.begin());
52
+ }
53
+
54
+ template<typename Container>
55
+ auto forward_end(Container&& c) ->
56
+ typename std::enable_if<std::is_lvalue_reference<Container>::value, decltype(c.end())>::type
57
+ {
58
+ return c.end();
59
+ }
60
+
61
+ template<typename Container>
62
+ auto forward_end(Container&& c) ->
63
+ typename std::enable_if<!std::is_lvalue_reference<Container>::value, decltype(std::make_move_iterator(c.end()))>::type
64
+ {
65
+ return std::make_move_iterator(c.end());
66
+ }
67
+
68
+ } /* namespace datasketches */
69
+
70
+ #endif
@@ -0,0 +1,114 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _COUNT_ZEROS_HPP_
21
+ #define _COUNT_ZEROS_HPP_
22
+
23
+ #include <cstdint>
24
+
25
+ #include <stdio.h>
26
+
27
+ namespace datasketches {
28
+
29
+ static const uint8_t byte_leading_zeros_table[256] = {
30
+ 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
31
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
32
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
33
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
34
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
42
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
44
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
45
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
46
+ };
47
+
48
+ static const uint8_t byte_trailing_zeros_table[256] = {
49
+ 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
50
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
51
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
52
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
53
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
54
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
55
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
56
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
57
+ 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
58
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
59
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
60
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
61
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
62
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
63
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
64
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
65
+ };
66
+
67
+ static const uint64_t FCLZ_MASK_56 = 0x00ffffffffffffff;
68
+ static const uint64_t FCLZ_MASK_48 = 0x0000ffffffffffff;
69
+ static const uint64_t FCLZ_MASK_40 = 0x000000ffffffffff;
70
+ static const uint64_t FCLZ_MASK_32 = 0x00000000ffffffff;
71
+ static const uint64_t FCLZ_MASK_24 = 0x0000000000ffffff;
72
+ static const uint64_t FCLZ_MASK_16 = 0x000000000000ffff;
73
+ static const uint64_t FCLZ_MASK_08 = 0x00000000000000ff;
74
+
75
+ static inline uint8_t count_leading_zeros_in_u64(uint64_t input) {
76
+ if (input > FCLZ_MASK_56)
77
+ return byte_leading_zeros_table[(input >> 56) & FCLZ_MASK_08];
78
+ if (input > FCLZ_MASK_48)
79
+ return 8 + byte_leading_zeros_table[(input >> 48) & FCLZ_MASK_08];
80
+ if (input > FCLZ_MASK_40)
81
+ return 16 + byte_leading_zeros_table[(input >> 40) & FCLZ_MASK_08];
82
+ if (input > FCLZ_MASK_32)
83
+ return 24 + byte_leading_zeros_table[(input >> 32) & FCLZ_MASK_08];
84
+ if (input > FCLZ_MASK_24)
85
+ return 32 + byte_leading_zeros_table[(input >> 24) & FCLZ_MASK_08];
86
+ if (input > FCLZ_MASK_16)
87
+ return 40 + byte_leading_zeros_table[(input >> 16) & FCLZ_MASK_08];
88
+ if (input > FCLZ_MASK_08)
89
+ return 48 + byte_leading_zeros_table[(input >> 8) & FCLZ_MASK_08];
90
+ if (true)
91
+ return 56 + byte_leading_zeros_table[(input ) & FCLZ_MASK_08];
92
+ }
93
+
94
+ static inline uint8_t count_trailing_zeros_in_u32(uint32_t input) {
95
+ for (int i = 0; i < 4; i++) {
96
+ const int byte = input & 0xff;
97
+ if (byte != 0) return (i << 3) + byte_trailing_zeros_table[byte];
98
+ input >>= 8;
99
+ }
100
+ return 32;
101
+ }
102
+
103
+ static inline uint8_t count_trailing_zeros_in_u64(uint64_t input) {
104
+ for (int i = 0; i < 8; i++) {
105
+ const int byte = input & 0xff;
106
+ if (byte != 0) return (i << 3) + byte_trailing_zeros_table[byte];
107
+ input >>= 8;
108
+ }
109
+ return 64;
110
+ }
111
+
112
+ } /* namespace datasketches */
113
+
114
+ #endif // _COUNT_ZEROS_HPP_
@@ -0,0 +1,107 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef INV_POW_2_TAB_HPP_
21
+ #define INV_POW_2_TAB_HPP_
22
+
23
+ #include <cmath>
24
+
25
+ namespace datasketches {
26
+
27
+ // the table was created by the following procedure:
28
+
29
+ //void fill_inverse_power_of_2_table() {
30
+ // for (int i = 0; i < 256; i++) {
31
+ // inverse_powers_of_2_table[i] = pow(2.0, (-1.0 * ((double) i)));
32
+ // printf("%.17g", inverse_powers_of_2_table[i]);
33
+ // if (i != 255) printf(", ");
34
+ // if ((i + 1) % 4 == 0) printf("\n");
35
+ // }
36
+ //}
37
+
38
+ static const double INVERSE_POWERS_OF_2[256] = {
39
+ 1, 0.5, 0.25, 0.125,
40
+ 0.0625, 0.03125, 0.015625, 0.0078125,
41
+ 0.00390625, 0.001953125, 0.0009765625, 0.00048828125,
42
+ 0.000244140625, 0.0001220703125, 6.103515625e-05, 3.0517578125e-05,
43
+ 1.52587890625e-05, 7.62939453125e-06, 3.814697265625e-06, 1.9073486328125e-06,
44
+ 9.5367431640625e-07, 4.76837158203125e-07, 2.384185791015625e-07, 1.1920928955078125e-07,
45
+ 5.9604644775390625e-08, 2.9802322387695312e-08, 1.4901161193847656e-08, 7.4505805969238281e-09,
46
+ 3.7252902984619141e-09, 1.862645149230957e-09, 9.3132257461547852e-10, 4.6566128730773926e-10,
47
+ 2.3283064365386963e-10, 1.1641532182693481e-10, 5.8207660913467407e-11, 2.9103830456733704e-11,
48
+ 1.4551915228366852e-11, 7.2759576141834259e-12, 3.637978807091713e-12, 1.8189894035458565e-12,
49
+ 9.0949470177292824e-13, 4.5474735088646412e-13, 2.2737367544323206e-13, 1.1368683772161603e-13,
50
+ 5.6843418860808015e-14, 2.8421709430404007e-14, 1.4210854715202004e-14, 7.1054273576010019e-15,
51
+ 3.5527136788005009e-15, 1.7763568394002505e-15, 8.8817841970012523e-16, 4.4408920985006262e-16,
52
+ 2.2204460492503131e-16, 1.1102230246251565e-16, 5.5511151231257827e-17, 2.7755575615628914e-17,
53
+ 1.3877787807814457e-17, 6.9388939039072284e-18, 3.4694469519536142e-18, 1.7347234759768071e-18,
54
+ 8.6736173798840355e-19, 4.3368086899420177e-19, 2.1684043449710089e-19, 1.0842021724855044e-19,
55
+ 5.4210108624275222e-20, 2.7105054312137611e-20, 1.3552527156068805e-20, 6.7762635780344027e-21,
56
+ 3.3881317890172014e-21, 1.6940658945086007e-21, 8.4703294725430034e-22, 4.2351647362715017e-22,
57
+ 2.1175823681357508e-22, 1.0587911840678754e-22, 5.2939559203393771e-23, 2.6469779601696886e-23,
58
+ 1.3234889800848443e-23, 6.6174449004242214e-24, 3.3087224502121107e-24, 1.6543612251060553e-24,
59
+ 8.2718061255302767e-25, 4.1359030627651384e-25, 2.0679515313825692e-25, 1.0339757656912846e-25,
60
+ 5.169878828456423e-26, 2.5849394142282115e-26, 1.2924697071141057e-26, 6.4623485355705287e-27,
61
+ 3.2311742677852644e-27, 1.6155871338926322e-27, 8.0779356694631609e-28, 4.0389678347315804e-28,
62
+ 2.0194839173657902e-28, 1.0097419586828951e-28, 5.0487097934144756e-29, 2.5243548967072378e-29,
63
+ 1.2621774483536189e-29, 6.3108872417680944e-30, 3.1554436208840472e-30, 1.5777218104420236e-30,
64
+ 7.8886090522101181e-31, 3.944304526105059e-31, 1.9721522630525295e-31, 9.8607613152626476e-32,
65
+ 4.9303806576313238e-32, 2.4651903288156619e-32, 1.2325951644078309e-32, 6.1629758220391547e-33,
66
+ 3.0814879110195774e-33, 1.5407439555097887e-33, 7.7037197775489434e-34, 3.8518598887744717e-34,
67
+ 1.9259299443872359e-34, 9.6296497219361793e-35, 4.8148248609680896e-35, 2.4074124304840448e-35,
68
+ 1.2037062152420224e-35, 6.018531076210112e-36, 3.009265538105056e-36, 1.504632769052528e-36,
69
+ 7.5231638452626401e-37, 3.76158192263132e-37, 1.88079096131566e-37, 9.4039548065783001e-38,
70
+ 4.70197740328915e-38, 2.350988701644575e-38, 1.1754943508222875e-38, 5.8774717541114375e-39,
71
+ 2.9387358770557188e-39, 1.4693679385278594e-39, 7.3468396926392969e-40, 3.6734198463196485e-40,
72
+ 1.8367099231598242e-40, 9.1835496157991212e-41, 4.5917748078995606e-41, 2.2958874039497803e-41,
73
+ 1.1479437019748901e-41, 5.7397185098744507e-42, 2.8698592549372254e-42, 1.4349296274686127e-42,
74
+ 7.1746481373430634e-43, 3.5873240686715317e-43, 1.7936620343357659e-43, 8.9683101716788293e-44,
75
+ 4.4841550858394146e-44, 2.2420775429197073e-44, 1.1210387714598537e-44, 5.6051938572992683e-45,
76
+ 2.8025969286496341e-45, 1.4012984643248171e-45, 7.0064923216240854e-46, 3.5032461608120427e-46,
77
+ 1.7516230804060213e-46, 8.7581154020301067e-47, 4.3790577010150533e-47, 2.1895288505075267e-47,
78
+ 1.0947644252537633e-47, 5.4738221262688167e-48, 2.7369110631344083e-48, 1.3684555315672042e-48,
79
+ 6.8422776578360209e-49, 3.4211388289180104e-49, 1.7105694144590052e-49, 8.5528470722950261e-50,
80
+ 4.276423536147513e-50, 2.1382117680737565e-50, 1.0691058840368783e-50, 5.3455294201843913e-51,
81
+ 2.6727647100921956e-51, 1.3363823550460978e-51, 6.6819117752304891e-52, 3.3409558876152446e-52,
82
+ 1.6704779438076223e-52, 8.3523897190381114e-53, 4.1761948595190557e-53, 2.0880974297595278e-53,
83
+ 1.0440487148797639e-53, 5.2202435743988196e-54, 2.6101217871994098e-54, 1.3050608935997049e-54,
84
+ 6.5253044679985245e-55, 3.2626522339992623e-55, 1.6313261169996311e-55, 8.1566305849981557e-56,
85
+ 4.0783152924990778e-56, 2.0391576462495389e-56, 1.0195788231247695e-56, 5.0978941156238473e-57,
86
+ 2.5489470578119236e-57, 1.2744735289059618e-57, 6.3723676445298091e-58, 3.1861838222649046e-58,
87
+ 1.5930919111324523e-58, 7.9654595556622614e-59, 3.9827297778311307e-59, 1.9913648889155653e-59,
88
+ 9.9568244445778267e-60, 4.9784122222889134e-60, 2.4892061111444567e-60, 1.2446030555722283e-60,
89
+ 6.2230152778611417e-61, 3.1115076389305709e-61, 1.5557538194652854e-61, 7.7787690973264271e-62,
90
+ 3.8893845486632136e-62, 1.9446922743316068e-62, 9.7234613716580339e-63, 4.861730685829017e-63,
91
+ 2.4308653429145085e-63, 1.2154326714572542e-63, 6.0771633572862712e-64, 3.0385816786431356e-64,
92
+ 1.5192908393215678e-64, 7.596454196607839e-65, 3.7982270983039195e-65, 1.8991135491519597e-65,
93
+ 9.4955677457597987e-66, 4.7477838728798994e-66, 2.3738919364399497e-66, 1.1869459682199748e-66,
94
+ 5.9347298410998742e-67, 2.9673649205499371e-67, 1.4836824602749686e-67, 7.4184123013748428e-68,
95
+ 3.7092061506874214e-68, 1.8546030753437107e-68, 9.2730153767185535e-69, 4.6365076883592767e-69,
96
+ 2.3182538441796384e-69, 1.1591269220898192e-69, 5.7956346104490959e-70, 2.897817305224548e-70,
97
+ 1.448908652612274e-70, 7.2445432630613699e-71, 3.6222716315306849e-71, 1.8111358157653425e-71,
98
+ 9.0556790788267124e-72, 4.5278395394133562e-72, 2.2639197697066781e-72, 1.131959884853339e-72,
99
+ 5.6597994242666952e-73, 2.8298997121333476e-73, 1.4149498560666738e-73, 7.074749280333369e-74,
100
+ 3.5373746401666845e-74, 1.7686873200833423e-74, 8.8434366004167113e-75, 4.4217183002083556e-75,
101
+ 2.2108591501041778e-75, 1.1054295750520889e-75, 5.5271478752604446e-76, 2.7635739376302223e-76,
102
+ 1.3817869688151111e-76, 6.9089348440755557e-77, 3.4544674220377779e-77, 1.7272337110188889e-77
103
+ };
104
+
105
+ } /* namespace datasketches */
106
+
107
+ #endif
@@ -0,0 +1,57 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _MEMORY_OPERATIONS_HPP_
21
+ #define _MEMORY_OPERATIONS_HPP_
22
+
23
+ #include <memory>
24
+ #include <exception>
25
+ #include <iostream>
26
+
27
+ namespace datasketches {
28
+
29
+ static inline void ensure_minimum_memory(size_t bytes_available, size_t min_needed) {
30
+ if (bytes_available < min_needed) {
31
+ throw std::out_of_range("Insufficient buffer size detected: bytes available "
32
+ + std::to_string(bytes_available) + ", minimum needed " + std::to_string(min_needed));
33
+ }
34
+ }
35
+
36
+ static inline void check_memory_size(size_t requested_index, size_t capacity) {
37
+ if (requested_index > capacity) {
38
+ throw std::out_of_range("Attempt to access memory beyond limits: requested index "
39
+ + std::to_string(requested_index) + ", capacity " + std::to_string(capacity));
40
+ }
41
+ }
42
+
43
+ // note: size is in bytes, not items
44
+ static inline size_t copy_from_mem(const void* src, void* dst, size_t size) {
45
+ memcpy(dst, src, size);
46
+ return size;
47
+ }
48
+
49
+ // note: size is in bytes, not items
50
+ static inline size_t copy_to_mem(const void* src, void* dst, size_t size) {
51
+ memcpy(dst, src, size);
52
+ return size;
53
+ }
54
+
55
+ } // namespace
56
+
57
+ #endif // _MEMORY_OPERATIONS_HPP_
@@ -0,0 +1,196 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef DATASKETCHES_SERDE_HPP_
21
+ #define DATASKETCHES_SERDE_HPP_
22
+
23
+ #include <cstring>
24
+ #include <iostream>
25
+ #include <memory>
26
+ #include <string>
27
+ #include <exception>
28
+
29
+ #include "memory_operations.hpp"
30
+
31
+ namespace datasketches {
32
+
33
+ // serialize and deserialize
34
+ template<typename T, typename Enable = void> struct serde {
35
+ // stream serialization
36
+ void serialize(std::ostream& os, const T* items, unsigned num) const;
37
+ void deserialize(std::istream& is, T* items, unsigned num) const; // items allocated but not initialized
38
+
39
+ // raw bytes serialization
40
+ size_t size_of_item(const T& item) const;
41
+ size_t serialize(void* ptr, size_t capacity, const T* items, unsigned num) const;
42
+ size_t deserialize(const void* ptr, size_t capacity, T* items, unsigned num) const; // items allocated but not initialized
43
+ };
44
+
45
+ // serde for all fixed-size arithmetic types (int and float of different sizes)
46
+ // in particular, kll_sketch<int64_t> should produce sketches binary-compatible
47
+ // with LongsSketch and ItemsSketch<Long> with ArrayOfLongsSerDe in Java
48
+ template<typename T>
49
+ struct serde<T, typename std::enable_if<std::is_arithmetic<T>::value>::type> {
50
+ void serialize(std::ostream& os, const T* items, unsigned num) const {
51
+ bool failure = false;
52
+ try {
53
+ os.write(reinterpret_cast<const char*>(items), sizeof(T) * num);
54
+ } catch (std::ostream::failure& e) {
55
+ failure = true;
56
+ }
57
+ if (failure || !os.good()) {
58
+ throw std::runtime_error("error writing to std::ostream with " + std::to_string(num) + " items");
59
+ }
60
+ }
61
+ void deserialize(std::istream& is, T* items, unsigned num) const {
62
+ bool failure = false;
63
+ try {
64
+ is.read((char*)items, sizeof(T) * num);
65
+ } catch (std::istream::failure& e) {
66
+ failure = true;
67
+ }
68
+ if (failure || !is.good()) {
69
+ throw std::runtime_error("error reading from std::istream with " + std::to_string(num) + " items");
70
+ }
71
+ }
72
+
73
+ size_t size_of_item(const T&) const {
74
+ return sizeof(T);
75
+ }
76
+ size_t serialize(void* ptr, size_t capacity, const T* items, unsigned num) const {
77
+ const size_t bytes_written = sizeof(T) * num;
78
+ check_memory_size(bytes_written, capacity);
79
+ memcpy(ptr, items, bytes_written);
80
+ return bytes_written;
81
+ }
82
+ size_t deserialize(const void* ptr, size_t capacity, T* items, unsigned num) const {
83
+ const size_t bytes_read = sizeof(T) * num;
84
+ check_memory_size(bytes_read, capacity);
85
+ memcpy(items, ptr, bytes_read);
86
+ return bytes_read;
87
+ }
88
+ };
89
+
90
+ // serde for std::string items
91
+ // This should produce sketches binary-compatible with
92
+ // ItemsSketch<String> with ArrayOfStringsSerDe in Java.
93
+ // The length of each string is stored as a 32-bit integer (historically),
94
+ // which may be too wasteful. Treat this as an example.
95
+ template<>
96
+ struct serde<std::string> {
97
+ void serialize(std::ostream& os, const std::string* items, unsigned num) const {
98
+ unsigned i = 0;
99
+ bool failure = false;
100
+ try {
101
+ for (; i < num && os.good(); i++) {
102
+ uint32_t length = items[i].size();
103
+ os.write((char*)&length, sizeof(length));
104
+ os.write(items[i].c_str(), length);
105
+ }
106
+ } catch (std::ostream::failure& e) {
107
+ failure = true;
108
+ }
109
+ if (failure || !os.good()) {
110
+ throw std::runtime_error("error writing to std::ostream at item " + std::to_string(i));
111
+ }
112
+ }
113
+ void deserialize(std::istream& is, std::string* items, unsigned num) const {
114
+ unsigned i = 0;
115
+ bool failure = false;
116
+ try {
117
+ for (; i < num; i++) {
118
+ uint32_t length;
119
+ is.read((char*)&length, sizeof(length));
120
+ if (!is.good()) { break; }
121
+ std::string str;
122
+ str.reserve(length);
123
+ for (uint32_t j = 0; j < length; j++) {
124
+ str.push_back(is.get());
125
+ }
126
+ if (!is.good()) { break; }
127
+ new (&items[i]) std::string(std::move(str));
128
+ }
129
+ } catch (std::istream::failure& e) {
130
+ failure = true;
131
+ }
132
+ if (failure || !is.good()) {
133
+ // clean up what we've already allocated
134
+ for (unsigned j = 0; j < i; ++j) {
135
+ items[j].~basic_string();
136
+ }
137
+ throw std::runtime_error("error reading from std::istream at item " + std::to_string(i));
138
+ }
139
+ }
140
+ size_t size_of_item(const std::string& item) const {
141
+ return sizeof(uint32_t) + item.size();
142
+ }
143
+ size_t serialize(void* ptr, size_t capacity, const std::string* items, unsigned num) const {
144
+ size_t bytes_written = 0;
145
+ for (unsigned i = 0; i < num; ++i) {
146
+ const uint32_t length = items[i].size();
147
+ const size_t new_bytes = length + sizeof(length);
148
+ check_memory_size(bytes_written + new_bytes, capacity);
149
+ memcpy(ptr, &length, sizeof(length));
150
+ ptr = static_cast<char*>(ptr) + sizeof(uint32_t);
151
+ memcpy(ptr, items[i].c_str(), length);
152
+ ptr = static_cast<char*>(ptr) + length;
153
+ bytes_written += new_bytes;
154
+ }
155
+ return bytes_written;
156
+ }
157
+ size_t deserialize(const void* ptr, size_t capacity, std::string* items, unsigned num) const {
158
+ size_t bytes_read = 0;
159
+ unsigned i = 0;
160
+ bool failure = false;
161
+ for (; i < num && !failure; ++i) {
162
+ uint32_t length;
163
+ if (bytes_read + sizeof(length) > capacity) {
164
+ bytes_read += sizeof(length); // we'll use this to report the error
165
+ failure = true;
166
+ break;
167
+ }
168
+ memcpy(&length, ptr, sizeof(length));
169
+ ptr = static_cast<const char*>(ptr) + sizeof(uint32_t);
170
+ bytes_read += sizeof(length);
171
+
172
+ if (bytes_read + length > capacity) {
173
+ bytes_read += length; // we'll use this to report the error
174
+ failure = true;
175
+ break;
176
+ }
177
+ new (&items[i]) std::string(static_cast<const char*>(ptr), length);
178
+ ptr = static_cast<const char*>(ptr) + length;
179
+ bytes_read += length;
180
+ }
181
+
182
+ if (failure) {
183
+ // clean up what we've already allocated
184
+ for (unsigned j = 0; j < i; ++j)
185
+ items[j].~basic_string();
186
+ // using this for a consistent error message
187
+ check_memory_size(bytes_read, capacity);
188
+ }
189
+
190
+ return bytes_read;
191
+ }
192
+ };
193
+
194
+ } /* namespace datasketches */
195
+
196
+ # endif