datasketches 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,62 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef CPC_COMMON_HPP_
21
+ #define CPC_COMMON_HPP_
22
+
23
+ #include <memory>
24
+
25
+ #include "MurmurHash3.h"
26
+
27
+ namespace datasketches {
28
+
29
+ static const uint8_t CPC_MIN_LG_K = 4;
30
+ static const uint8_t CPC_MAX_LG_K = 26;
31
+ static const uint8_t CPC_DEFAULT_LG_K = 11;
32
+
33
+ template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
34
+ template<typename A> using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>;
35
+ template<typename A> using AllocU32 = typename std::allocator_traits<A>::template rebind_alloc<uint32_t>;
36
+ template<typename A> using AllocU64 = typename std::allocator_traits<A>::template rebind_alloc<uint64_t>;
37
+
38
+ template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
39
+ template<typename A> using vector_u32 = std::vector<uint32_t, AllocU32<A>>;
40
+ template<typename A> using vector_u64 = std::vector<uint64_t, AllocU64<A>>;
41
+
42
+ // forward declaration
43
+ template<typename A> class u32_table;
44
+
45
+ template<typename A>
46
+ struct compressed_state {
47
+ vector_u32<A> table_data;
48
+ uint32_t table_data_words;
49
+ uint32_t table_num_entries; // can be different from the number of entries in the sketch in hybrid mode
50
+ vector_u32<A> window_data;
51
+ uint32_t window_data_words;
52
+ };
53
+
54
+ template<typename A>
55
+ struct uncompressed_state {
56
+ u32_table<A> table;
57
+ vector_u8<A> window;
58
+ };
59
+
60
+ } /* namespace datasketches */
61
+
62
+ #endif
@@ -0,0 +1,147 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ // author Kevin Lang, Oath Research
21
+
22
+ #ifndef CPC_COMPRESSOR_HPP_
23
+ #define CPC_COMPRESSOR_HPP_
24
+
25
+ #include "cpc_common.hpp"
26
+
27
+ namespace datasketches {
28
+
29
+ /*
30
+ * This is a very efficient compressor specialized for use by the CPC Sketch.
31
+ * There are two very different compression schemes here: one for the sliding window
32
+ * and another for the table of so-called surprising values.
33
+ * These two compression schemes are designed for specific probability distributions of entries
34
+ * in these data structures and make some compromises for performance. As a result
35
+ * the compression is slightly less effective than theoretically achievable but is very fast.
36
+ */
37
+
38
+ // forward declarations
39
+ template<typename A> class cpc_sketch_alloc;
40
+ template<typename A> class cpc_compressor;
41
+
42
+ // the compressor is not instantiated directly
43
+ // the sketch implementation uses this global function to statically allocate and construct on the first use
44
+ template<typename A>
45
+ inline cpc_compressor<A>& get_compressor();
46
+
47
+ template<typename A>
48
+ class cpc_compressor {
49
+ public:
50
+ void compress(const cpc_sketch_alloc<A>& source, compressed_state<A>& target) const;
51
+ void uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint64_t num_coupons) const;
52
+
53
+ // methods below are public for testing
54
+
55
+ // This returns the number of compressed words that were actually used. It is the caller's
56
+ // responsibility to ensure that the compressed_words array is long enough to prevent over-run.
57
+ size_t low_level_compress_bytes(
58
+ const uint8_t* byte_array, // input
59
+ size_t num_bytes_to_encode,
60
+ const uint16_t* encoding_table,
61
+ uint32_t* compressed_words // output
62
+ ) const;
63
+
64
+ void low_level_uncompress_bytes(
65
+ uint8_t* byte_array, // output
66
+ size_t num_bytes_to_decode,
67
+ const uint16_t* decoding_table,
68
+ const uint32_t* compressed_words,
69
+ size_t num_compressed_words // input
70
+ ) const;
71
+
72
+ // Here "pairs" refers to row-column pairs that specify
73
+ // the positions of surprising values in the bit matrix.
74
+
75
+ // returns the number of compressedWords actually used
76
+ size_t low_level_compress_pairs(
77
+ const uint32_t* pair_array, // input
78
+ size_t num_pairs_to_encode,
79
+ size_t num_base_bits,
80
+ uint32_t* compressed_words // output
81
+ ) const;
82
+
83
+ void low_level_uncompress_pairs(
84
+ uint32_t* pair_array, // output
85
+ size_t num_pairs_to_decode,
86
+ size_t num_base_bits,
87
+ const uint32_t* compressed_words, // input
88
+ size_t num_compressed_words // input
89
+ ) const;
90
+
91
+ private:
92
+ // These decoding tables are created at library startup time by inverting the encoding tables
93
+ uint16_t* decoding_tables_for_high_entropy_byte[22] = {
94
+ // sixteen tables for the steady state (chosen based on the "phase" of C/K)
95
+ NULL, NULL, NULL, NULL,
96
+ NULL, NULL, NULL, NULL,
97
+ NULL, NULL, NULL, NULL,
98
+ NULL, NULL, NULL, NULL,
99
+ // six more tables for the gradual transition between warmup mode and the steady state.
100
+ NULL, NULL, NULL, NULL, NULL, NULL
101
+ };
102
+ uint16_t* length_limited_unary_decoding_table65;
103
+ uint8_t* column_permutations_for_decoding[16] = {
104
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
105
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
106
+ };
107
+
108
+ cpc_compressor();
109
+ template<typename T> friend cpc_compressor<T>& get_compressor();
110
+ ~cpc_compressor();
111
+
112
+ void make_decoding_tables(); // call this at startup
113
+ void free_decoding_tables(); // call this at the end
114
+
115
+ void compress_sparse_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& target) const;
116
+ void compress_hybrid_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& target) const;
117
+ void compress_pinned_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& target) const;
118
+ void compress_sliding_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& target) const;
119
+
120
+ void uncompress_sparse_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const;
121
+ void uncompress_hybrid_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const;
122
+ void uncompress_pinned_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const;
123
+ void uncompress_sliding_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const;
124
+
125
+ uint8_t* make_inverse_permutation(const uint8_t* permu, int length);
126
+ uint16_t* make_decoding_table(const uint16_t* encoding_table, int num_byte_values);
127
+ void validate_decoding_table(const uint16_t* decoding_table, const uint16_t* encoding_table) const;
128
+
129
+ void compress_surprising_values(const vector_u32<A>& pairs, uint8_t lg_k, compressed_state<A>& result) const;
130
+ void compress_sliding_window(const uint8_t* window, uint8_t lg_k, uint32_t num_coupons, compressed_state<A>& target) const;
131
+
132
+ vector_u32<A> uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k) const;
133
+ void uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const;
134
+
135
+ static size_t safe_length_for_compressed_pair_buf(uint64_t k, size_t num_pairs, size_t num_base_bits);
136
+ static size_t safe_length_for_compressed_window_buf(uint64_t k);
137
+ static uint8_t determine_pseudo_phase(uint8_t lg_k, uint64_t c);
138
+
139
+ static inline vector_u32<A> tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space);
140
+ static inline uint64_t golomb_choose_number_of_base_bits(uint64_t k, uint64_t count);
141
+ };
142
+
143
+ } /* namespace datasketches */
144
+
145
+ #include "cpc_compressor_impl.hpp"
146
+
147
+ #endif
@@ -0,0 +1,742 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ // author Kevin Lang, Oath Research
21
+
22
+ #ifndef CPC_COMPRESSOR_IMPL_HPP_
23
+ #define CPC_COMPRESSOR_IMPL_HPP_
24
+
25
+ #include <memory>
26
+
27
+ #include "compression_data.hpp"
28
+ #include "cpc_util.hpp"
29
+ #include "cpc_common.hpp"
30
+ #include "count_zeros.hpp"
31
+
32
+ namespace datasketches {
33
+
34
+ // construct on first use
35
+ template<typename A>
36
+ cpc_compressor<A>& get_compressor() {
37
+ static cpc_compressor<A>* instance = new cpc_compressor<A>(); // use new for global initialization
38
+ return *instance;
39
+ }
40
+
41
+ template<typename A>
42
+ cpc_compressor<A>::cpc_compressor() {
43
+ make_decoding_tables();
44
+ }
45
+
46
+ template<typename A>
47
+ cpc_compressor<A>::~cpc_compressor() {
48
+ free_decoding_tables();
49
+ }
50
+
51
+ template<typename A>
52
+ uint8_t* cpc_compressor<A>::make_inverse_permutation(const uint8_t* permu, int length) {
53
+ uint8_t* inverse = new uint8_t[length]; // use new for global initialization
54
+ for (int i = 0; i < length; i++) {
55
+ inverse[permu[i]] = i;
56
+ }
57
+ for (int i = 0; i < length; i++) {
58
+ if (permu[inverse[i]] != i) throw std::logic_error("inverse permutation error");
59
+ }
60
+ return inverse;
61
+ }
62
+
63
+ /* Given an encoding table that maps unsigned bytes to codewords
64
+ of length at most 12, this builds a size-4096 decoding table */
65
+ // The second argument is typically 256, but can be other values such as 65.
66
+ template<typename A>
67
+ uint16_t* cpc_compressor<A>::make_decoding_table(const uint16_t* encoding_table, int num_byte_values) {
68
+ uint16_t* decoding_table = new uint16_t[4096]; // use new for global initialization
69
+ for (int byte_value = 0; byte_value < num_byte_values; byte_value++) {
70
+ const int encoding_entry = encoding_table[byte_value];
71
+ const int code_value = encoding_entry & 0xfff;
72
+ const int code_length = encoding_entry >> 12;
73
+ const int decoding_entry = (code_length << 8) | byte_value;
74
+ const int garbage_length = 12 - code_length;
75
+ const int num_copies = 1 << garbage_length;
76
+ for (int garbage_bits = 0; garbage_bits < num_copies; garbage_bits++) {
77
+ const int extended_code_value = code_value | (garbage_bits << code_length);
78
+ decoding_table[extended_code_value & 0xfff] = decoding_entry;
79
+ }
80
+ }
81
+ return decoding_table;
82
+ }
83
+
84
+ template<typename A>
85
+ void cpc_compressor<A>::validate_decoding_table(const uint16_t* decoding_table, const uint16_t* encoding_table) const {
86
+ for (int decode_this = 0; decode_this < 4096; decode_this++) {
87
+ const int tmp_d = decoding_table[decode_this];
88
+ const int decoded_byte = tmp_d & 0xff;
89
+ const int decoded_length = tmp_d >> 8;
90
+
91
+ const int tmp_e = encoding_table[decoded_byte];
92
+ const int encoded_bit_pattern = tmp_e & 0xfff;
93
+ const int encoded_length = tmp_e >> 12;
94
+
95
+ if (decoded_length != encoded_length) throw std::logic_error("decoded length error");
96
+ if (encoded_bit_pattern != (decode_this & ((1 << decoded_length) - 1))) throw std::logic_error("bit pattern error");
97
+ }
98
+ }
99
+
100
+ template<typename A>
101
+ void cpc_compressor<A>::make_decoding_tables() {
102
+ length_limited_unary_decoding_table65 = make_decoding_table(length_limited_unary_encoding_table65, 65);
103
+ validate_decoding_table(
104
+ length_limited_unary_decoding_table65,
105
+ length_limited_unary_encoding_table65
106
+ );
107
+
108
+ for (int i = 0; i < (16 + 6); i++) {
109
+ decoding_tables_for_high_entropy_byte[i] = make_decoding_table(encoding_tables_for_high_entropy_byte[i], 256);
110
+ validate_decoding_table(
111
+ decoding_tables_for_high_entropy_byte[i],
112
+ encoding_tables_for_high_entropy_byte[i]
113
+ );
114
+ }
115
+
116
+ for (int i = 0; i < 16; i++) {
117
+ column_permutations_for_decoding[i] = make_inverse_permutation(column_permutations_for_encoding[i], 56);
118
+ }
119
+ }
120
+
121
+ template<typename A>
122
+ void cpc_compressor<A>::free_decoding_tables() {
123
+ delete[] length_limited_unary_decoding_table65;
124
+ for (int i = 0; i < (16 + 6); i++) {
125
+ delete[] decoding_tables_for_high_entropy_byte[i];
126
+ }
127
+ for (int i = 0; i < 16; i++) {
128
+ delete[] column_permutations_for_decoding[i];
129
+ }
130
+ }
131
+
132
+ template<typename A>
133
+ void cpc_compressor<A>::compress(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
134
+ switch (source.determine_flavor()) {
135
+ case cpc_sketch_alloc<A>::flavor::EMPTY:
136
+ break;
137
+ case cpc_sketch_alloc<A>::flavor::SPARSE:
138
+ compress_sparse_flavor(source, result);
139
+ if (result.window_data.size() > 0) throw std::logic_error("window is not expected");
140
+ if (result.table_data.size() == 0) throw std::logic_error("table is expected");
141
+ break;
142
+ case cpc_sketch_alloc<A>::flavor::HYBRID:
143
+ compress_hybrid_flavor(source, result);
144
+ if (result.window_data.size() > 0) throw std::logic_error("window is not expected");
145
+ if (result.table_data.size() == 0) throw std::logic_error("table is expected");
146
+ break;
147
+ case cpc_sketch_alloc<A>::flavor::PINNED:
148
+ compress_pinned_flavor(source, result);
149
+ if (result.window_data.size() == 0) throw std::logic_error("window is not expected");
150
+ break;
151
+ case cpc_sketch_alloc<A>::flavor::SLIDING:
152
+ compress_sliding_flavor(source, result);
153
+ if (result.window_data.size() == 0) throw std::logic_error("window is expected");
154
+ break;
155
+ default: throw std::logic_error("Unknown sketch flavor");
156
+ }
157
+ }
158
+
159
+ template<typename A>
160
+ void cpc_compressor<A>::uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint64_t num_coupons) const {
161
+ switch (cpc_sketch_alloc<A>::determine_flavor(lg_k, num_coupons)) {
162
+ case cpc_sketch_alloc<A>::flavor::EMPTY:
163
+ target.table = u32_table<A>(2, 6 + lg_k);
164
+ break;
165
+ case cpc_sketch_alloc<A>::flavor::SPARSE:
166
+ uncompress_sparse_flavor(source, target, lg_k);
167
+ break;
168
+ case cpc_sketch_alloc<A>::flavor::HYBRID:
169
+ uncompress_hybrid_flavor(source, target, lg_k);
170
+ break;
171
+ case cpc_sketch_alloc<A>::flavor::PINNED:
172
+ if (source.window_data.size() == 0) throw std::logic_error("window is expected");
173
+ uncompress_pinned_flavor(source, target, lg_k, num_coupons);
174
+ break;
175
+ case cpc_sketch_alloc<A>::flavor::SLIDING:
176
+ uncompress_sliding_flavor(source, target, lg_k, num_coupons);
177
+ break;
178
+ default: std::logic_error("Unknown sketch flavor");
179
+ }
180
+ }
181
+
182
+ template<typename A>
183
+ void cpc_compressor<A>::compress_sparse_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
184
+ if (source.sliding_window.size() > 0) throw std::logic_error("unexpected sliding window");
185
+ vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
186
+ u32_table<A>::introspective_insertion_sort(pairs.data(), 0, pairs.size());
187
+ compress_surprising_values(pairs, source.get_lg_k(), result);
188
+ }
189
+
190
+ template<typename A>
191
+ void cpc_compressor<A>::uncompress_sparse_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const {
192
+ if (source.window_data.size() > 0) throw std::logic_error("unexpected sliding window");
193
+ if (source.table_data.size() == 0) throw std::logic_error("table is expected");
194
+ vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries, lg_k);
195
+ target.table = u32_table<A>::make_from_pairs(pairs.data(), source.table_num_entries, lg_k);
196
+ }
197
+
198
+ // This is complicated because it effectively builds a Sparse version
199
+ // of a Pinned sketch before compressing it. Hence the name Hybrid.
200
+ template<typename A>
201
+ void cpc_compressor<A>::compress_hybrid_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
202
+ if (source.sliding_window.size() == 0) throw std::logic_error("no sliding window");
203
+ if (source.window_offset != 0) throw std::logic_error("window_offset != 0");
204
+ const size_t k = 1 << source.get_lg_k();
205
+ vector_u32<A> pairs_from_table = source.surprising_value_table.unwrapping_get_items();
206
+ if (pairs_from_table.size() > 0) u32_table<A>::introspective_insertion_sort(pairs_from_table.data(), 0, pairs_from_table.size());
207
+ const size_t num_pairs_from_window = source.get_num_coupons() - pairs_from_table.size(); // because the window offset is zero
208
+
209
+ vector_u32<A> all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, pairs_from_table.size());
210
+
211
+ u32_table<A>::merge(
212
+ pairs_from_table.data(), 0, pairs_from_table.size(),
213
+ all_pairs.data(), pairs_from_table.size(), num_pairs_from_window,
214
+ all_pairs.data(), 0
215
+ ); // note the overlapping subarray trick
216
+
217
+ compress_surprising_values(all_pairs, source.get_lg_k(), result);
218
+ }
219
+
220
+ template<typename A>
221
+ void cpc_compressor<A>::uncompress_hybrid_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const {
222
+ if (source.window_data.size() > 0) throw std::logic_error("window is not expected");
223
+ if (source.table_data.size() == 0) throw std::logic_error("table is expected");
224
+ vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries, lg_k);
225
+
226
+ // In the hybrid flavor, some of these pairs actually
227
+ // belong in the window, so we will separate them out,
228
+ // moving the "true" pairs to the bottom of the array.
229
+ const size_t k = 1 << lg_k;
230
+ target.window.resize(k, 0); // important: zero the memory
231
+ size_t next_true_pair = 0;
232
+ for (size_t i = 0; i < source.table_num_entries; i++) {
233
+ const uint32_t row_col = pairs[i];
234
+ if (row_col == UINT32_MAX) throw std::logic_error("empty marker is not expected");
235
+ const uint8_t col = row_col & 63;
236
+ if (col < 8) {
237
+ const size_t row = row_col >> 6;
238
+ target.window[row] |= 1 << col; // set the window bit
239
+ } else {
240
+ pairs[next_true_pair++] = row_col; // move true pair down
241
+ }
242
+ }
243
+ target.table = u32_table<A>::make_from_pairs(pairs.data(), next_true_pair, lg_k);
244
+ }
245
+
246
+ template<typename A>
247
+ void cpc_compressor<A>::compress_pinned_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
248
+ compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
249
+ vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
250
+ if (pairs.size() > 0) {
251
+ // Here we subtract 8 from the column indices. Because they are stored in the low 6 bits
252
+ // of each row_col pair, and because no column index is less than 8 for a "Pinned" sketch,
253
+ // we can simply subtract 8 from the pairs themselves.
254
+
255
+ // shift the columns over by 8 positions before compressing (because of the window)
256
+ for (size_t i = 0; i < pairs.size(); i++) {
257
+ if ((pairs[i] & 63) < 8) throw std::logic_error("(pairs[i] & 63) < 8");
258
+ pairs[i] -= 8;
259
+ }
260
+
261
+ if (pairs.size() > 0) u32_table<A>::introspective_insertion_sort(pairs.data(), 0, pairs.size());
262
+ compress_surprising_values(pairs, source.get_lg_k(), result);
263
+ }
264
+ }
265
+
266
+ template<typename A>
267
+ void cpc_compressor<A>::uncompress_pinned_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const {
268
+ if (source.window_data.size() == 0) throw std::logic_error("window is expected");
269
+ uncompress_sliding_window(source.window_data.data(), source.window_data_words, target.window, lg_k, num_coupons);
270
+ const size_t num_pairs = source.table_num_entries;
271
+ if (num_pairs == 0) {
272
+ target.table = u32_table<A>(2, 6 + lg_k);
273
+ } else {
274
+ if (source.table_data.size() == 0) throw std::logic_error("table is expected");
275
+ vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs, lg_k);
276
+ // undo the compressor's 8-column shift
277
+ for (size_t i = 0; i < num_pairs; i++) {
278
+ if ((pairs[i] & 63) >= 56) throw std::logic_error("(pairs[i] & 63) >= 56");
279
+ pairs[i] += 8;
280
+ }
281
+ target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k);
282
+ }
283
+ }
284
+
285
+ template<typename A>
286
+ void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
287
+ compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
288
+ vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
289
+ if (pairs.size() > 0) {
290
+ // Here we apply a complicated transformation to the column indices, which
291
+ // changes the implied ordering of the pairs, so we must do it before sorting.
292
+
293
+ const uint8_t pseudo_phase = determine_pseudo_phase(source.get_lg_k(), source.get_num_coupons());
294
+ const uint8_t* permutation = column_permutations_for_encoding[pseudo_phase];
295
+
296
+ const uint8_t offset = source.window_offset;
297
+ if (offset > 56) throw std::out_of_range("offset out of range");
298
+
299
+ for (size_t i = 0; i < pairs.size(); i++) {
300
+ const uint32_t row_col = pairs[i];
301
+ const size_t row = row_col >> 6;
302
+ uint8_t col = row_col & 63;
303
+ // first rotate the columns into a canonical configuration: new = ((old - (offset+8)) + 64) mod 64
304
+ col = (col + 56 - offset) & 63;
305
+ if (col >= 56) throw std::out_of_range("col out of range");
306
+ // then apply the permutation
307
+ col = permutation[col];
308
+ pairs[i] = (row << 6) | col;
309
+ }
310
+
311
+ if (pairs.size() > 0) u32_table<A>::introspective_insertion_sort(pairs.data(), 0, pairs.size());
312
+ compress_surprising_values(pairs, source.get_lg_k(), result);
313
+ }
314
+ }
315
+
316
+ template<typename A>
317
+ void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const {
318
+ if (source.window_data.size() == 0) throw std::logic_error("window is expected");
319
+ uncompress_sliding_window(source.window_data.data(), source.window_data_words, target.window, lg_k, num_coupons);
320
+ const size_t num_pairs = source.table_num_entries;
321
+ if (num_pairs == 0) {
322
+ target.table = u32_table<A>(2, 6 + lg_k);
323
+ } else {
324
+ if (source.table_data.size() == 0) throw std::logic_error("table is expected");
325
+ vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs, lg_k);
326
+
327
+ const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
328
+ if (pseudo_phase >= 16) throw std::logic_error("pseudo phase >= 16");
329
+ const uint8_t* permutation = column_permutations_for_decoding[pseudo_phase];
330
+
331
+ uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k, num_coupons);
332
+ if (offset > 56) throw std::out_of_range("offset out of range");
333
+
334
+ for (size_t i = 0; i < num_pairs; i++) {
335
+ const uint32_t row_col = pairs[i];
336
+ const size_t row = row_col >> 6;
337
+ uint8_t col = row_col & 63;
338
+ // first undo the permutation
339
+ col = permutation[col];
340
+ // then undo the rotation: old = (new + (offset+8)) mod 64
341
+ col = (col + (offset + 8)) & 63;
342
+ pairs[i] = (row << 6) | col;
343
+ }
344
+
345
+ target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k);
346
+ }
347
+ }
348
+
349
+ template<typename A>
350
+ void cpc_compressor<A>::compress_surprising_values(const vector_u32<A>& pairs, uint8_t lg_k, compressed_state<A>& result) const {
351
+ const size_t k = 1 << lg_k;
352
+ const uint64_t num_base_bits = golomb_choose_number_of_base_bits(k + pairs.size(), pairs.size());
353
+ const uint64_t table_len = safe_length_for_compressed_pair_buf(k, pairs.size(), num_base_bits);
354
+ result.table_data.resize(table_len);
355
+
356
+ size_t csv_length = low_level_compress_pairs(pairs.data(), pairs.size(), num_base_bits, result.table_data.data());
357
+
358
+ // At this point we could free the unused portion of the compression output buffer,
359
+ // but it is not necessary if it is temporary
360
+ // Note: realloc caused strange timing spikes for lgK = 11 and 12.
361
+
362
+ result.table_data_words = csv_length;
363
+ result.table_num_entries = pairs.size();
364
+ }
365
+
366
+ template<typename A>
367
+ vector_u32<A> cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k) const {
368
+ const size_t k = 1 << lg_k;
369
+ vector_u32<A> pairs(num_pairs);
370
+ const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs);
371
+ low_level_uncompress_pairs(pairs.data(), num_pairs, num_base_bits, data, data_words);
372
+ return pairs;
373
+ }
374
+
375
+ template<typename A>
376
+ void cpc_compressor<A>::compress_sliding_window(const uint8_t* window, uint8_t lg_k, uint32_t num_coupons, compressed_state<A>& target) const {
377
+ const size_t k = 1 << lg_k;
378
+ const size_t window_buf_len = safe_length_for_compressed_window_buf(k);
379
+ target.window_data.resize(window_buf_len);
380
+ const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
381
+ size_t data_words = low_level_compress_bytes(window, k, encoding_tables_for_high_entropy_byte[pseudo_phase], target.window_data.data());
382
+
383
+ // At this point we could free the unused portion of the compression output buffer,
384
+ // but it is not necessary if it is temporary
385
+ // Note: realloc caused strange timing spikes for lgK = 11 and 12.
386
+
387
+ target.window_data_words = data_words;
388
+ }
389
+
390
+ template<typename A>
391
+ void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const {
392
+ const size_t k = 1 << lg_k;
393
+ window.resize(k); // zeroing not needed here (unlike the Hybrid Flavor)
394
+ const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
395
+ low_level_uncompress_bytes(window.data(), k, decoding_tables_for_high_entropy_byte[pseudo_phase], data, data_words);
396
+ }
397
+
398
+ template<typename A>
399
+ size_t cpc_compressor<A>::safe_length_for_compressed_pair_buf(uint64_t k, size_t num_pairs, size_t num_base_bits) {
400
+ // Long ybits = k + numPairs; // simpler and safer UB
401
+ // The following tighter UB on ybits is based on page 198
402
+ // of the textbook "Managing Gigabytes" by Witten, Moffat, and Bell.
403
+ // Notice that if numBaseBits == 0 it coincides with (k + numPairs).
404
+ const size_t ybits = num_pairs * (1 + num_base_bits) + (k >> num_base_bits);
405
+ const size_t xbits = 12 * num_pairs;
406
+ const size_t padding = num_base_bits > 10 ? 0 : 10 - num_base_bits;
407
+ return divide_longs_rounding_up(xbits + ybits + padding, 32);
408
+ }
409
+
410
+ // Explanation of padding: we write
411
+ // 1) xdelta (huffman, provides at least 1 bit, requires 12-bit lookahead)
412
+ // 2) ydeltaGolombHi (unary, provides at least 1 bit, requires 8-bit lookahead)
413
+ // 3) ydeltaGolombLo (straight B bits).
414
+ // So the 12-bit lookahead is the tight constraint, but there are at least (2 + B) bits emitted,
415
+ // so we would be safe with max (0, 10 - B) bits of padding at the end of the bitstream.
416
+ template<typename A>
417
+ size_t cpc_compressor<A>::safe_length_for_compressed_window_buf(uint64_t k) { // measured in 32-bit words
418
+ const size_t bits = 12 * k + 11; // 11 bits of padding, due to 12-bit lookahead, with 1 bit certainly present.
419
+ return divide_longs_rounding_up(bits, 32);
420
+ }
421
+
422
+ template<typename A>
423
+ uint8_t cpc_compressor<A>::determine_pseudo_phase(uint8_t lg_k, uint64_t c) {
424
+ const size_t k = 1 << lg_k;
425
+ // This mid-range logic produces pseudo-phases. They are used to select encoding tables.
426
+ // The thresholds were chosen by hand after looking at plots of measured compression.
427
+ if (1000 * c < 2375 * k) {
428
+ if ( 4 * c < 3 * k) return 16 + 0; // mid-range table
429
+ else if ( 10 * c < 11 * k) return 16 + 1; // mid-range table
430
+ else if ( 100 * c < 132 * k) return 16 + 2; // mid-range table
431
+ else if ( 3 * c < 5 * k) return 16 + 3; // mid-range table
432
+ else if (1000 * c < 1965 * k) return 16 + 4; // mid-range table
433
+ else if (1000 * c < 2275 * k) return 16 + 5; // mid-range table
434
+ else return 6; // steady-state table employed before its actual phase
435
+ } else { // This steady-state logic produces true phases. They are used to select
436
+ // encoding tables, and also column permutations for the "Sliding" flavor.
437
+ if (lg_k < 4) throw std::logic_error("lgK < 4");
438
+ const size_t tmp = c >> (lg_k - 4);
439
+ const uint8_t phase = tmp & 15;
440
+ if (phase < 0 || phase >= 16) throw std::out_of_range("wrong phase");
441
+ return phase;
442
+ }
443
+ }
444
+
445
+ static inline void maybe_flush_bitbuf(uint64_t& bitbuf, uint8_t& bufbits, uint32_t* wordarr, size_t& wordindex) {
446
+ if (bufbits >= 32) {
447
+ wordarr[wordindex++] = bitbuf & 0xffffffff;
448
+ bitbuf = bitbuf >> 32;
449
+ bufbits -= 32;
450
+ }
451
+ }
452
+
453
+ static inline void maybe_fill_bitbuf(uint64_t& bitbuf, uint8_t& bufbits, const uint32_t* wordarr, size_t& wordindex, uint8_t minbits) {
454
+ if (bufbits < minbits) {
455
+ bitbuf |= static_cast<uint64_t>(wordarr[wordindex++]) << bufbits;
456
+ bufbits += 32;
457
+ }
458
+ }
459
+
460
+ // This returns the number of compressed words that were actually used.
461
+ // It is the caller's responsibility to ensure that the compressed_words array is long enough.
462
+ template<typename A>
463
+ size_t cpc_compressor<A>::low_level_compress_bytes(
464
+ const uint8_t* byte_array, // input
465
+ size_t num_bytes_to_encode,
466
+ const uint16_t* encoding_table,
467
+ uint32_t* compressed_words // output
468
+ ) const {
469
+ uint64_t bitbuf = 0; // bits are packed into this first, then are flushed to compressed_words
470
+ uint8_t bufbits = 0; // number of bits currently in bitbuf; must be between 0 and 31
471
+ size_t next_word_index = 0;
472
+
473
+ for (size_t byte_index = 0; byte_index < num_bytes_to_encode; byte_index++) {
474
+ const uint64_t code_info = encoding_table[byte_array[byte_index]];
475
+ const uint64_t code_val = code_info & 0xfff;
476
+ const int code_len = code_info >> 12;
477
+ bitbuf |= (code_val << bufbits);
478
+ bufbits += code_len;
479
+ maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
480
+ }
481
+
482
+ // Pad the bitstream with 11 zero-bits so that the decompressor's 12-bit peek can't overrun its input.
483
+ bufbits += 11;
484
+ maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
485
+
486
+ if (bufbits > 0) { // We are done encoding now, so we flush the bit buffer.
487
+ if (bufbits >= 32) throw std::logic_error("bufbits >= 32");
488
+ compressed_words[next_word_index++] = bitbuf & 0xffffffff;
489
+ bitbuf = 0; bufbits = 0; // not really necessary
490
+ }
491
+ return next_word_index;
492
+ }
493
+
494
+ template<typename A>
495
+ void cpc_compressor<A>::low_level_uncompress_bytes(
496
+ uint8_t* byte_array, // output
497
+ size_t num_bytes_to_decode,
498
+ const uint16_t* decoding_table,
499
+ const uint32_t* compressed_words, // input
500
+ size_t num_compressed_words
501
+ ) const {
502
+ size_t word_index = 0;
503
+ uint64_t bitbuf = 0;
504
+ uint8_t bufbits = 0;
505
+
506
+ if (byte_array == nullptr) throw std::logic_error("byte_array == NULL");
507
+ if (decoding_table == nullptr) throw std::logic_error("decoding_table == NULL");
508
+ if (compressed_words == nullptr) throw std::logic_error("compressed_words == NULL");
509
+
510
+ for (size_t byte_index = 0; byte_index < num_bytes_to_decode; byte_index++) {
511
+ maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, word_index, 12); // ensure 12 bits in bit buffer
512
+
513
+ const size_t peek12 = bitbuf & 0xfff; // These 12 bits will include an entire Huffman codeword.
514
+ const uint16_t lookup = decoding_table[peek12];
515
+ const uint8_t code_word_length = lookup >> 8;
516
+ const uint8_t decoded_byte = lookup & 0xff;
517
+ byte_array[byte_index] = decoded_byte;
518
+ bitbuf >>= code_word_length;
519
+ bufbits -= code_word_length;
520
+ }
521
+ // Buffer over-run should be impossible unless there is a bug.
522
+ // However, we might as well check here.
523
+ if (word_index > num_compressed_words) throw std::logic_error("word_index > num_compressed_words");
524
+ }
525
+
526
+ static inline uint64_t read_unary(
527
+ const uint32_t* compressed_words,
528
+ size_t& next_word_index,
529
+ uint64_t& bitbuf,
530
+ uint8_t& bufbits
531
+ );
532
+
533
+ static inline void write_unary(
534
+ uint32_t* compressed_words,
535
+ size_t& next_word_index_ptr,
536
+ uint64_t& bit_buf_ptr,
537
+ uint8_t& buf_bits_ptr,
538
+ uint64_t value
539
+ );
540
+
541
+ // Here "pairs" refers to row/column pairs that specify
542
+ // the positions of surprising values in the bit matrix.
543
+
544
+ // returns the number of compressed_words actually used
545
+ template<typename A>
546
+ size_t cpc_compressor<A>::low_level_compress_pairs(
547
+ const uint32_t* pair_array, // input
548
+ size_t num_pairs_to_encode,
549
+ size_t num_base_bits,
550
+ uint32_t* compressed_words // output
551
+ ) const {
552
+ uint64_t bitbuf = 0;
553
+ uint8_t bufbits = 0;
554
+ size_t next_word_index = 0;
555
+ const uint64_t golomb_lo_mask = (1 << num_base_bits) - 1;
556
+ uint64_t predicted_row_index = 0;
557
+ uint16_t predicted_col_index = 0;
558
+
559
+ for (size_t pair_index = 0; pair_index < num_pairs_to_encode; pair_index++) {
560
+ const uint32_t row_col = pair_array[pair_index];
561
+ const uint64_t row_index = row_col >> 6;
562
+ const uint16_t col_index = row_col & 63;
563
+
564
+ if (row_index != predicted_row_index) predicted_col_index = 0;
565
+
566
+ if (row_index < predicted_row_index) throw std::logic_error("row_index < predicted_row_index");
567
+ if (col_index < predicted_col_index) throw std::logic_error("col_index < predicted_col_index");
568
+
569
+ const uint64_t y_delta = row_index - predicted_row_index;
570
+ const uint16_t x_delta = col_index - predicted_col_index;
571
+
572
+ predicted_row_index = row_index;
573
+ predicted_col_index = col_index + 1;
574
+
575
+ const uint64_t code_info = length_limited_unary_encoding_table65[x_delta];
576
+ const uint64_t code_val = code_info & 0xfff;
577
+ const uint8_t code_len = code_info >> 12;
578
+ bitbuf |= code_val << bufbits;
579
+ bufbits += code_len;
580
+ maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
581
+
582
+ const uint64_t golomb_lo = y_delta & golomb_lo_mask;
583
+ const uint64_t golomb_hi = y_delta >> num_base_bits;
584
+
585
+ write_unary(compressed_words, next_word_index, bitbuf, bufbits, golomb_hi);
586
+
587
+ bitbuf |= golomb_lo << bufbits;
588
+ bufbits += num_base_bits;
589
+ maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
590
+ }
591
+
592
+ // Pad the bitstream so that the decompressor's 12-bit peek can't overrun its input.
593
+ const uint8_t padding = (num_base_bits > 10) ? 0 : 10 - num_base_bits;
594
+ bufbits += padding;
595
+ maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
596
+
597
+ if (bufbits > 0) { // We are done encoding now, so we flush the bit buffer
598
+ if (bufbits >= 32) throw std::logic_error("bufbits >= 32");
599
+ compressed_words[next_word_index++] = bitbuf & 0xffffffff;
600
+ bitbuf = 0; bufbits = 0; // not really necessary
601
+ }
602
+
603
+ return next_word_index;
604
+ }
605
+
606
+ template<typename A>
607
+ void cpc_compressor<A>::low_level_uncompress_pairs(
608
+ uint32_t* pair_array, // output
609
+ size_t num_pairs_to_decode,
610
+ size_t num_base_bits,
611
+ const uint32_t* compressed_words, // input
612
+ size_t num_compressed_words
613
+ ) const {
614
+ size_t word_index = 0;
615
+ uint64_t bitbuf = 0;
616
+ uint8_t bufbits = 0;
617
+ const uint64_t golomb_lo_mask = (1 << num_base_bits) - 1;
618
+ uint64_t predicted_row_index = 0;
619
+ uint16_t predicted_col_index = 0;
620
+
621
+ // for each pair we need to read:
622
+ // x_delta (12-bit length-limited unary)
623
+ // y_delta_hi (unary)
624
+ // y_delta_lo (basebits)
625
+
626
+ for (size_t pair_index = 0; pair_index < num_pairs_to_decode; pair_index++) {
627
+ maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, word_index, 12); // ensure 12 bits in bit buffer
628
+ const size_t peek12 = bitbuf & 0xfff;
629
+ const uint16_t lookup = length_limited_unary_decoding_table65[peek12];
630
+ const int code_word_length = lookup >> 8;
631
+ const int16_t x_delta = lookup & 0xff;
632
+ bitbuf >>= code_word_length;
633
+ bufbits -= code_word_length;
634
+
635
+ const uint64_t golomb_hi = read_unary(compressed_words, word_index, bitbuf, bufbits);
636
+
637
+ maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, word_index, num_base_bits); // ensure num_base_bits in bit buffer
638
+ const uint64_t golomb_lo = bitbuf & golomb_lo_mask;
639
+ bitbuf >>= num_base_bits;
640
+ bufbits -= num_base_bits;
641
+ const int64_t y_delta = (golomb_hi << num_base_bits) | golomb_lo;
642
+
643
+ // Now that we have x_delta and y_delta, we can compute the pair's row and column
644
+ if (y_delta > 0) predicted_col_index = 0;
645
+ const uint64_t row_index = predicted_row_index + y_delta;
646
+ const uint16_t col_index = predicted_col_index + x_delta;
647
+ const uint32_t row_col = (row_index << 6) | col_index;
648
+ pair_array[pair_index] = row_col;
649
+ predicted_row_index = row_index;
650
+ predicted_col_index = col_index + 1;
651
+ }
652
+ if (word_index > num_compressed_words) throw std::logic_error("word_index > num_compressed_words"); // check for buffer over-run
653
+ }
654
+
655
+ uint64_t read_unary(
656
+ const uint32_t* compressed_words,
657
+ size_t& next_word_index,
658
+ uint64_t& bitbuf,
659
+ uint8_t& bufbits
660
+ ) {
661
+ if (compressed_words == nullptr) throw std::logic_error("compressed_words == NULL");
662
+ size_t subtotal = 0;
663
+ while (true) {
664
+ maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, next_word_index, 8); // ensure 8 bits in bit buffer
665
+
666
+ const uint8_t peek8 = bitbuf & 0xff; // These 8 bits include either all or part of the Unary codeword
667
+ const uint8_t trailing_zeros = byte_trailing_zeros_table[peek8];
668
+
669
+ if (trailing_zeros > 8) throw std::out_of_range("trailing_zeros out of range");
670
+ if (trailing_zeros < 8) {
671
+ bufbits -= 1 + trailing_zeros;
672
+ bitbuf >>= 1 + trailing_zeros;
673
+ return subtotal + trailing_zeros;
674
+ }
675
+ // The codeword was partial, so read some more
676
+ subtotal += 8;
677
+ bufbits -= 8;
678
+ bitbuf >>= 8;
679
+ }
680
+ }
681
+
682
+ void write_unary(
683
+ uint32_t* compressed_words,
684
+ size_t& next_word_index,
685
+ uint64_t& bitbuf,
686
+ uint8_t& bufbits,
687
+ uint64_t value
688
+ ) {
689
+ if (compressed_words == nullptr) throw std::logic_error("compressed_words == NULL");
690
+ if (bufbits > 31) throw std::out_of_range("bufbits out of range");
691
+
692
+ uint64_t remaining = value;
693
+
694
+ while (remaining >= 16) {
695
+ remaining -= 16;
696
+ // Here we output 16 zeros, but we don't need to physically write them into bitbuf
697
+ // because it already contains zeros in that region.
698
+ bufbits += 16; // Record the fact that 16 bits of output have occurred.
699
+ maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
700
+ }
701
+
702
+ if (remaining > 15) throw std::out_of_range("remaining out of range");
703
+
704
+ const uint64_t the_unary_code = 1 << remaining;
705
+ bitbuf |= the_unary_code << bufbits;
706
+ bufbits += 1 + remaining;
707
+ maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
708
+ }
709
+
710
+ // The empty space that this leaves at the beginning of the output array
711
+ // will be filled in later by the caller.
712
+ template<typename A>
713
+ vector_u32<A> cpc_compressor<A>::tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space) {
714
+ const size_t output_length = empty_space + num_pairs_to_get;
715
+ vector_u32<A> pairs(output_length);
716
+ size_t pair_index = empty_space;
717
+ for (unsigned row_index = 0; row_index < k; row_index++) {
718
+ uint8_t byte = window[row_index];
719
+ while (byte != 0) {
720
+ const uint8_t col_index = byte_trailing_zeros_table[byte];
721
+ byte = byte ^ (1 << col_index); // erase the 1
722
+ pairs[pair_index++] = (row_index << 6) | col_index;
723
+ }
724
+ }
725
+ if (pair_index != output_length) throw std::logic_error("pair_index != output_length");
726
+ return pairs;
727
+ }
728
+
729
+ // returns an integer that is between
730
+ // zero and ceiling(log_2(k)) - 1, inclusive
731
+ template<typename A>
732
+ uint64_t cpc_compressor<A>::golomb_choose_number_of_base_bits(uint64_t k, uint64_t count) {
733
+ if (k < 1) throw std::invalid_argument("golomb_choose_number_of_base_bits: k < 1");
734
+ if (count < 1) throw std::invalid_argument("golomb_choose_number_of_base_bits: count < 1");
735
+ const uint64_t quotient = (k - count) / count; // integer division
736
+ if (quotient == 0) return 0;
737
+ else return long_floor_log2_of_long(quotient);
738
+ }
739
+
740
+ } /* namespace datasketches */
741
+
742
+ #endif