datasketches 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,62 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef CPC_COMMON_HPP_
21
+ #define CPC_COMMON_HPP_
22
+
23
+ #include <memory>
24
+
25
+ #include "MurmurHash3.h"
26
+
27
+ namespace datasketches {
28
+
29
+ static const uint8_t CPC_MIN_LG_K = 4;
30
+ static const uint8_t CPC_MAX_LG_K = 26;
31
+ static const uint8_t CPC_DEFAULT_LG_K = 11;
32
+
33
+ template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
34
+ template<typename A> using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>;
35
+ template<typename A> using AllocU32 = typename std::allocator_traits<A>::template rebind_alloc<uint32_t>;
36
+ template<typename A> using AllocU64 = typename std::allocator_traits<A>::template rebind_alloc<uint64_t>;
37
+
38
+ template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
39
+ template<typename A> using vector_u32 = std::vector<uint32_t, AllocU32<A>>;
40
+ template<typename A> using vector_u64 = std::vector<uint64_t, AllocU64<A>>;
41
+
42
+ // forward declaration
43
+ template<typename A> class u32_table;
44
+
45
+ template<typename A>
46
+ struct compressed_state {
47
+ vector_u32<A> table_data;
48
+ uint32_t table_data_words;
49
+ uint32_t table_num_entries; // can be different from the number of entries in the sketch in hybrid mode
50
+ vector_u32<A> window_data;
51
+ uint32_t window_data_words;
52
+ };
53
+
54
+ template<typename A>
55
+ struct uncompressed_state {
56
+ u32_table<A> table;
57
+ vector_u8<A> window;
58
+ };
59
+
60
+ } /* namespace datasketches */
61
+
62
+ #endif
@@ -0,0 +1,147 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ // author Kevin Lang, Oath Research
21
+
22
+ #ifndef CPC_COMPRESSOR_HPP_
23
+ #define CPC_COMPRESSOR_HPP_
24
+
25
+ #include "cpc_common.hpp"
26
+
27
+ namespace datasketches {
28
+
29
+ /*
30
+ * This is a very efficient compressor specialized for use by the CPC Sketch.
31
+ * There are two very different compression schemes here: one for the sliding window
32
+ * and another for the table of so-called surprising values.
33
+ * These two compression schemes are designed for specific probability distributions of entries
34
+ * in these data structures and make some compromises for performance. As a result
35
+ * the compression is slightly less effective than theoretically achievable but is very fast.
36
+ */
37
+
38
+ // forward declarations
39
+ template<typename A> class cpc_sketch_alloc;
40
+ template<typename A> class cpc_compressor;
41
+
42
+ // the compressor is not instantiated directly
43
+ // the sketch implementation uses this global function to statically allocate and construct on the first use
44
+ template<typename A>
45
+ inline cpc_compressor<A>& get_compressor();
46
+
47
+ template<typename A>
48
+ class cpc_compressor {
49
+ public:
50
+ void compress(const cpc_sketch_alloc<A>& source, compressed_state<A>& target) const;
51
+ void uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint64_t num_coupons) const;
52
+
53
+ // methods below are public for testing
54
+
55
+ // This returns the number of compressed words that were actually used. It is the caller's
56
+ // responsibility to ensure that the compressed_words array is long enough to prevent over-run.
57
+ size_t low_level_compress_bytes(
58
+ const uint8_t* byte_array, // input
59
+ size_t num_bytes_to_encode,
60
+ const uint16_t* encoding_table,
61
+ uint32_t* compressed_words // output
62
+ ) const;
63
+
64
+ void low_level_uncompress_bytes(
65
+ uint8_t* byte_array, // output
66
+ size_t num_bytes_to_decode,
67
+ const uint16_t* decoding_table,
68
+ const uint32_t* compressed_words,
69
+ size_t num_compressed_words // input
70
+ ) const;
71
+
72
+ // Here "pairs" refers to row-column pairs that specify
73
+ // the positions of surprising values in the bit matrix.
74
+
75
+ // returns the number of compressedWords actually used
76
+ size_t low_level_compress_pairs(
77
+ const uint32_t* pair_array, // input
78
+ size_t num_pairs_to_encode,
79
+ size_t num_base_bits,
80
+ uint32_t* compressed_words // output
81
+ ) const;
82
+
83
+ void low_level_uncompress_pairs(
84
+ uint32_t* pair_array, // output
85
+ size_t num_pairs_to_decode,
86
+ size_t num_base_bits,
87
+ const uint32_t* compressed_words, // input
88
+ size_t num_compressed_words // input
89
+ ) const;
90
+
91
+ private:
92
+ // These decoding tables are created at library startup time by inverting the encoding tables
93
+ uint16_t* decoding_tables_for_high_entropy_byte[22] = {
94
+ // sixteen tables for the steady state (chosen based on the "phase" of C/K)
95
+ NULL, NULL, NULL, NULL,
96
+ NULL, NULL, NULL, NULL,
97
+ NULL, NULL, NULL, NULL,
98
+ NULL, NULL, NULL, NULL,
99
+ // six more tables for the gradual transition between warmup mode and the steady state.
100
+ NULL, NULL, NULL, NULL, NULL, NULL
101
+ };
102
+ uint16_t* length_limited_unary_decoding_table65;
103
+ uint8_t* column_permutations_for_decoding[16] = {
104
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
105
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
106
+ };
107
+
108
+ cpc_compressor();
109
+ template<typename T> friend cpc_compressor<T>& get_compressor();
110
+ ~cpc_compressor();
111
+
112
+ void make_decoding_tables(); // call this at startup
113
+ void free_decoding_tables(); // call this at the end
114
+
115
+ void compress_sparse_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& target) const;
116
+ void compress_hybrid_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& target) const;
117
+ void compress_pinned_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& target) const;
118
+ void compress_sliding_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& target) const;
119
+
120
+ void uncompress_sparse_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const;
121
+ void uncompress_hybrid_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const;
122
+ void uncompress_pinned_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const;
123
+ void uncompress_sliding_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const;
124
+
125
+ uint8_t* make_inverse_permutation(const uint8_t* permu, int length);
126
+ uint16_t* make_decoding_table(const uint16_t* encoding_table, int num_byte_values);
127
+ void validate_decoding_table(const uint16_t* decoding_table, const uint16_t* encoding_table) const;
128
+
129
+ void compress_surprising_values(const vector_u32<A>& pairs, uint8_t lg_k, compressed_state<A>& result) const;
130
+ void compress_sliding_window(const uint8_t* window, uint8_t lg_k, uint32_t num_coupons, compressed_state<A>& target) const;
131
+
132
+ vector_u32<A> uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k) const;
133
+ void uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const;
134
+
135
+ static size_t safe_length_for_compressed_pair_buf(uint64_t k, size_t num_pairs, size_t num_base_bits);
136
+ static size_t safe_length_for_compressed_window_buf(uint64_t k);
137
+ static uint8_t determine_pseudo_phase(uint8_t lg_k, uint64_t c);
138
+
139
+ static inline vector_u32<A> tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space);
140
+ static inline uint64_t golomb_choose_number_of_base_bits(uint64_t k, uint64_t count);
141
+ };
142
+
143
+ } /* namespace datasketches */
144
+
145
+ #include "cpc_compressor_impl.hpp"
146
+
147
+ #endif
@@ -0,0 +1,742 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ // author Kevin Lang, Oath Research
21
+
22
+ #ifndef CPC_COMPRESSOR_IMPL_HPP_
23
+ #define CPC_COMPRESSOR_IMPL_HPP_
24
+
25
+ #include <memory>
26
+
27
+ #include "compression_data.hpp"
28
+ #include "cpc_util.hpp"
29
+ #include "cpc_common.hpp"
30
+ #include "count_zeros.hpp"
31
+
32
+ namespace datasketches {
33
+
34
+ // construct on first use
35
+ template<typename A>
36
+ cpc_compressor<A>& get_compressor() {
37
+ static cpc_compressor<A>* instance = new cpc_compressor<A>(); // use new for global initialization
38
+ return *instance;
39
+ }
40
+
41
+ template<typename A>
42
+ cpc_compressor<A>::cpc_compressor() {
43
+ make_decoding_tables();
44
+ }
45
+
46
+ template<typename A>
47
+ cpc_compressor<A>::~cpc_compressor() {
48
+ free_decoding_tables();
49
+ }
50
+
51
+ template<typename A>
52
+ uint8_t* cpc_compressor<A>::make_inverse_permutation(const uint8_t* permu, int length) {
53
+ uint8_t* inverse = new uint8_t[length]; // use new for global initialization
54
+ for (int i = 0; i < length; i++) {
55
+ inverse[permu[i]] = i;
56
+ }
57
+ for (int i = 0; i < length; i++) {
58
+ if (permu[inverse[i]] != i) throw std::logic_error("inverse permutation error");
59
+ }
60
+ return inverse;
61
+ }
62
+
63
+ /* Given an encoding table that maps unsigned bytes to codewords
64
+ of length at most 12, this builds a size-4096 decoding table */
65
+ // The second argument is typically 256, but can be other values such as 65.
66
+ template<typename A>
67
+ uint16_t* cpc_compressor<A>::make_decoding_table(const uint16_t* encoding_table, int num_byte_values) {
68
+ uint16_t* decoding_table = new uint16_t[4096]; // use new for global initialization
69
+ for (int byte_value = 0; byte_value < num_byte_values; byte_value++) {
70
+ const int encoding_entry = encoding_table[byte_value];
71
+ const int code_value = encoding_entry & 0xfff;
72
+ const int code_length = encoding_entry >> 12;
73
+ const int decoding_entry = (code_length << 8) | byte_value;
74
+ const int garbage_length = 12 - code_length;
75
+ const int num_copies = 1 << garbage_length;
76
+ for (int garbage_bits = 0; garbage_bits < num_copies; garbage_bits++) {
77
+ const int extended_code_value = code_value | (garbage_bits << code_length);
78
+ decoding_table[extended_code_value & 0xfff] = decoding_entry;
79
+ }
80
+ }
81
+ return decoding_table;
82
+ }
83
+
84
+ template<typename A>
85
+ void cpc_compressor<A>::validate_decoding_table(const uint16_t* decoding_table, const uint16_t* encoding_table) const {
86
+ for (int decode_this = 0; decode_this < 4096; decode_this++) {
87
+ const int tmp_d = decoding_table[decode_this];
88
+ const int decoded_byte = tmp_d & 0xff;
89
+ const int decoded_length = tmp_d >> 8;
90
+
91
+ const int tmp_e = encoding_table[decoded_byte];
92
+ const int encoded_bit_pattern = tmp_e & 0xfff;
93
+ const int encoded_length = tmp_e >> 12;
94
+
95
+ if (decoded_length != encoded_length) throw std::logic_error("decoded length error");
96
+ if (encoded_bit_pattern != (decode_this & ((1 << decoded_length) - 1))) throw std::logic_error("bit pattern error");
97
+ }
98
+ }
99
+
100
+ template<typename A>
101
+ void cpc_compressor<A>::make_decoding_tables() {
102
+ length_limited_unary_decoding_table65 = make_decoding_table(length_limited_unary_encoding_table65, 65);
103
+ validate_decoding_table(
104
+ length_limited_unary_decoding_table65,
105
+ length_limited_unary_encoding_table65
106
+ );
107
+
108
+ for (int i = 0; i < (16 + 6); i++) {
109
+ decoding_tables_for_high_entropy_byte[i] = make_decoding_table(encoding_tables_for_high_entropy_byte[i], 256);
110
+ validate_decoding_table(
111
+ decoding_tables_for_high_entropy_byte[i],
112
+ encoding_tables_for_high_entropy_byte[i]
113
+ );
114
+ }
115
+
116
+ for (int i = 0; i < 16; i++) {
117
+ column_permutations_for_decoding[i] = make_inverse_permutation(column_permutations_for_encoding[i], 56);
118
+ }
119
+ }
120
+
121
+ template<typename A>
122
+ void cpc_compressor<A>::free_decoding_tables() {
123
+ delete[] length_limited_unary_decoding_table65;
124
+ for (int i = 0; i < (16 + 6); i++) {
125
+ delete[] decoding_tables_for_high_entropy_byte[i];
126
+ }
127
+ for (int i = 0; i < 16; i++) {
128
+ delete[] column_permutations_for_decoding[i];
129
+ }
130
+ }
131
+
132
+ template<typename A>
133
+ void cpc_compressor<A>::compress(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
134
+ switch (source.determine_flavor()) {
135
+ case cpc_sketch_alloc<A>::flavor::EMPTY:
136
+ break;
137
+ case cpc_sketch_alloc<A>::flavor::SPARSE:
138
+ compress_sparse_flavor(source, result);
139
+ if (result.window_data.size() > 0) throw std::logic_error("window is not expected");
140
+ if (result.table_data.size() == 0) throw std::logic_error("table is expected");
141
+ break;
142
+ case cpc_sketch_alloc<A>::flavor::HYBRID:
143
+ compress_hybrid_flavor(source, result);
144
+ if (result.window_data.size() > 0) throw std::logic_error("window is not expected");
145
+ if (result.table_data.size() == 0) throw std::logic_error("table is expected");
146
+ break;
147
+ case cpc_sketch_alloc<A>::flavor::PINNED:
148
+ compress_pinned_flavor(source, result);
149
+ if (result.window_data.size() == 0) throw std::logic_error("window is not expected");
150
+ break;
151
+ case cpc_sketch_alloc<A>::flavor::SLIDING:
152
+ compress_sliding_flavor(source, result);
153
+ if (result.window_data.size() == 0) throw std::logic_error("window is expected");
154
+ break;
155
+ default: throw std::logic_error("Unknown sketch flavor");
156
+ }
157
+ }
158
+
159
+ template<typename A>
160
+ void cpc_compressor<A>::uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint64_t num_coupons) const {
161
+ switch (cpc_sketch_alloc<A>::determine_flavor(lg_k, num_coupons)) {
162
+ case cpc_sketch_alloc<A>::flavor::EMPTY:
163
+ target.table = u32_table<A>(2, 6 + lg_k);
164
+ break;
165
+ case cpc_sketch_alloc<A>::flavor::SPARSE:
166
+ uncompress_sparse_flavor(source, target, lg_k);
167
+ break;
168
+ case cpc_sketch_alloc<A>::flavor::HYBRID:
169
+ uncompress_hybrid_flavor(source, target, lg_k);
170
+ break;
171
+ case cpc_sketch_alloc<A>::flavor::PINNED:
172
+ if (source.window_data.size() == 0) throw std::logic_error("window is expected");
173
+ uncompress_pinned_flavor(source, target, lg_k, num_coupons);
174
+ break;
175
+ case cpc_sketch_alloc<A>::flavor::SLIDING:
176
+ uncompress_sliding_flavor(source, target, lg_k, num_coupons);
177
+ break;
178
+ default: std::logic_error("Unknown sketch flavor");
179
+ }
180
+ }
181
+
182
+ template<typename A>
183
+ void cpc_compressor<A>::compress_sparse_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
184
+ if (source.sliding_window.size() > 0) throw std::logic_error("unexpected sliding window");
185
+ vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
186
+ u32_table<A>::introspective_insertion_sort(pairs.data(), 0, pairs.size());
187
+ compress_surprising_values(pairs, source.get_lg_k(), result);
188
+ }
189
+
190
+ template<typename A>
191
+ void cpc_compressor<A>::uncompress_sparse_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const {
192
+ if (source.window_data.size() > 0) throw std::logic_error("unexpected sliding window");
193
+ if (source.table_data.size() == 0) throw std::logic_error("table is expected");
194
+ vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries, lg_k);
195
+ target.table = u32_table<A>::make_from_pairs(pairs.data(), source.table_num_entries, lg_k);
196
+ }
197
+
198
+ // This is complicated because it effectively builds a Sparse version
199
+ // of a Pinned sketch before compressing it. Hence the name Hybrid.
200
+ template<typename A>
201
+ void cpc_compressor<A>::compress_hybrid_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
202
+ if (source.sliding_window.size() == 0) throw std::logic_error("no sliding window");
203
+ if (source.window_offset != 0) throw std::logic_error("window_offset != 0");
204
+ const size_t k = 1 << source.get_lg_k();
205
+ vector_u32<A> pairs_from_table = source.surprising_value_table.unwrapping_get_items();
206
+ if (pairs_from_table.size() > 0) u32_table<A>::introspective_insertion_sort(pairs_from_table.data(), 0, pairs_from_table.size());
207
+ const size_t num_pairs_from_window = source.get_num_coupons() - pairs_from_table.size(); // because the window offset is zero
208
+
209
+ vector_u32<A> all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, pairs_from_table.size());
210
+
211
+ u32_table<A>::merge(
212
+ pairs_from_table.data(), 0, pairs_from_table.size(),
213
+ all_pairs.data(), pairs_from_table.size(), num_pairs_from_window,
214
+ all_pairs.data(), 0
215
+ ); // note the overlapping subarray trick
216
+
217
+ compress_surprising_values(all_pairs, source.get_lg_k(), result);
218
+ }
219
+
220
+ template<typename A>
221
+ void cpc_compressor<A>::uncompress_hybrid_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const {
222
+ if (source.window_data.size() > 0) throw std::logic_error("window is not expected");
223
+ if (source.table_data.size() == 0) throw std::logic_error("table is expected");
224
+ vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries, lg_k);
225
+
226
+ // In the hybrid flavor, some of these pairs actually
227
+ // belong in the window, so we will separate them out,
228
+ // moving the "true" pairs to the bottom of the array.
229
+ const size_t k = 1 << lg_k;
230
+ target.window.resize(k, 0); // important: zero the memory
231
+ size_t next_true_pair = 0;
232
+ for (size_t i = 0; i < source.table_num_entries; i++) {
233
+ const uint32_t row_col = pairs[i];
234
+ if (row_col == UINT32_MAX) throw std::logic_error("empty marker is not expected");
235
+ const uint8_t col = row_col & 63;
236
+ if (col < 8) {
237
+ const size_t row = row_col >> 6;
238
+ target.window[row] |= 1 << col; // set the window bit
239
+ } else {
240
+ pairs[next_true_pair++] = row_col; // move true pair down
241
+ }
242
+ }
243
+ target.table = u32_table<A>::make_from_pairs(pairs.data(), next_true_pair, lg_k);
244
+ }
245
+
246
+ template<typename A>
247
+ void cpc_compressor<A>::compress_pinned_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
248
+ compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
249
+ vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
250
+ if (pairs.size() > 0) {
251
+ // Here we subtract 8 from the column indices. Because they are stored in the low 6 bits
252
+ // of each row_col pair, and because no column index is less than 8 for a "Pinned" sketch,
253
+ // we can simply subtract 8 from the pairs themselves.
254
+
255
+ // shift the columns over by 8 positions before compressing (because of the window)
256
+ for (size_t i = 0; i < pairs.size(); i++) {
257
+ if ((pairs[i] & 63) < 8) throw std::logic_error("(pairs[i] & 63) < 8");
258
+ pairs[i] -= 8;
259
+ }
260
+
261
+ if (pairs.size() > 0) u32_table<A>::introspective_insertion_sort(pairs.data(), 0, pairs.size());
262
+ compress_surprising_values(pairs, source.get_lg_k(), result);
263
+ }
264
+ }
265
+
266
+ template<typename A>
267
+ void cpc_compressor<A>::uncompress_pinned_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const {
268
+ if (source.window_data.size() == 0) throw std::logic_error("window is expected");
269
+ uncompress_sliding_window(source.window_data.data(), source.window_data_words, target.window, lg_k, num_coupons);
270
+ const size_t num_pairs = source.table_num_entries;
271
+ if (num_pairs == 0) {
272
+ target.table = u32_table<A>(2, 6 + lg_k);
273
+ } else {
274
+ if (source.table_data.size() == 0) throw std::logic_error("table is expected");
275
+ vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs, lg_k);
276
+ // undo the compressor's 8-column shift
277
+ for (size_t i = 0; i < num_pairs; i++) {
278
+ if ((pairs[i] & 63) >= 56) throw std::logic_error("(pairs[i] & 63) >= 56");
279
+ pairs[i] += 8;
280
+ }
281
+ target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k);
282
+ }
283
+ }
284
+
285
+ template<typename A>
286
+ void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
287
+ compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
288
+ vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
289
+ if (pairs.size() > 0) {
290
+ // Here we apply a complicated transformation to the column indices, which
291
+ // changes the implied ordering of the pairs, so we must do it before sorting.
292
+
293
+ const uint8_t pseudo_phase = determine_pseudo_phase(source.get_lg_k(), source.get_num_coupons());
294
+ const uint8_t* permutation = column_permutations_for_encoding[pseudo_phase];
295
+
296
+ const uint8_t offset = source.window_offset;
297
+ if (offset > 56) throw std::out_of_range("offset out of range");
298
+
299
+ for (size_t i = 0; i < pairs.size(); i++) {
300
+ const uint32_t row_col = pairs[i];
301
+ const size_t row = row_col >> 6;
302
+ uint8_t col = row_col & 63;
303
+ // first rotate the columns into a canonical configuration: new = ((old - (offset+8)) + 64) mod 64
304
+ col = (col + 56 - offset) & 63;
305
+ if (col >= 56) throw std::out_of_range("col out of range");
306
+ // then apply the permutation
307
+ col = permutation[col];
308
+ pairs[i] = (row << 6) | col;
309
+ }
310
+
311
+ if (pairs.size() > 0) u32_table<A>::introspective_insertion_sort(pairs.data(), 0, pairs.size());
312
+ compress_surprising_values(pairs, source.get_lg_k(), result);
313
+ }
314
+ }
315
+
316
+ template<typename A>
317
+ void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const {
318
+ if (source.window_data.size() == 0) throw std::logic_error("window is expected");
319
+ uncompress_sliding_window(source.window_data.data(), source.window_data_words, target.window, lg_k, num_coupons);
320
+ const size_t num_pairs = source.table_num_entries;
321
+ if (num_pairs == 0) {
322
+ target.table = u32_table<A>(2, 6 + lg_k);
323
+ } else {
324
+ if (source.table_data.size() == 0) throw std::logic_error("table is expected");
325
+ vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs, lg_k);
326
+
327
+ const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
328
+ if (pseudo_phase >= 16) throw std::logic_error("pseudo phase >= 16");
329
+ const uint8_t* permutation = column_permutations_for_decoding[pseudo_phase];
330
+
331
+ uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k, num_coupons);
332
+ if (offset > 56) throw std::out_of_range("offset out of range");
333
+
334
+ for (size_t i = 0; i < num_pairs; i++) {
335
+ const uint32_t row_col = pairs[i];
336
+ const size_t row = row_col >> 6;
337
+ uint8_t col = row_col & 63;
338
+ // first undo the permutation
339
+ col = permutation[col];
340
+ // then undo the rotation: old = (new + (offset+8)) mod 64
341
+ col = (col + (offset + 8)) & 63;
342
+ pairs[i] = (row << 6) | col;
343
+ }
344
+
345
+ target.table = u32_table<A>::make_from_pairs(pairs.data(), num_pairs, lg_k);
346
+ }
347
+ }
348
+
349
+ template<typename A>
350
+ void cpc_compressor<A>::compress_surprising_values(const vector_u32<A>& pairs, uint8_t lg_k, compressed_state<A>& result) const {
351
+ const size_t k = 1 << lg_k;
352
+ const uint64_t num_base_bits = golomb_choose_number_of_base_bits(k + pairs.size(), pairs.size());
353
+ const uint64_t table_len = safe_length_for_compressed_pair_buf(k, pairs.size(), num_base_bits);
354
+ result.table_data.resize(table_len);
355
+
356
+ size_t csv_length = low_level_compress_pairs(pairs.data(), pairs.size(), num_base_bits, result.table_data.data());
357
+
358
+ // At this point we could free the unused portion of the compression output buffer,
359
+ // but it is not necessary if it is temporary
360
+ // Note: realloc caused strange timing spikes for lgK = 11 and 12.
361
+
362
+ result.table_data_words = csv_length;
363
+ result.table_num_entries = pairs.size();
364
+ }
365
+
366
+ template<typename A>
367
+ vector_u32<A> cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k) const {
368
+ const size_t k = 1 << lg_k;
369
+ vector_u32<A> pairs(num_pairs);
370
+ const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs);
371
+ low_level_uncompress_pairs(pairs.data(), num_pairs, num_base_bits, data, data_words);
372
+ return pairs;
373
+ }
374
+
375
+ template<typename A>
376
+ void cpc_compressor<A>::compress_sliding_window(const uint8_t* window, uint8_t lg_k, uint32_t num_coupons, compressed_state<A>& target) const {
377
+ const size_t k = 1 << lg_k;
378
+ const size_t window_buf_len = safe_length_for_compressed_window_buf(k);
379
+ target.window_data.resize(window_buf_len);
380
+ const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
381
+ size_t data_words = low_level_compress_bytes(window, k, encoding_tables_for_high_entropy_byte[pseudo_phase], target.window_data.data());
382
+
383
+ // At this point we could free the unused portion of the compression output buffer,
384
+ // but it is not necessary if it is temporary
385
+ // Note: realloc caused strange timing spikes for lgK = 11 and 12.
386
+
387
+ target.window_data_words = data_words;
388
+ }
389
+
390
+ template<typename A>
391
+ void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const {
392
+ const size_t k = 1 << lg_k;
393
+ window.resize(k); // zeroing not needed here (unlike the Hybrid Flavor)
394
+ const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
395
+ low_level_uncompress_bytes(window.data(), k, decoding_tables_for_high_entropy_byte[pseudo_phase], data, data_words);
396
+ }
397
+
398
+ template<typename A>
399
+ size_t cpc_compressor<A>::safe_length_for_compressed_pair_buf(uint64_t k, size_t num_pairs, size_t num_base_bits) {
400
+ // Long ybits = k + numPairs; // simpler and safer UB
401
+ // The following tighter UB on ybits is based on page 198
402
+ // of the textbook "Managing Gigabytes" by Witten, Moffat, and Bell.
403
+ // Notice that if numBaseBits == 0 it coincides with (k + numPairs).
404
+ const size_t ybits = num_pairs * (1 + num_base_bits) + (k >> num_base_bits);
405
+ const size_t xbits = 12 * num_pairs;
406
+ const size_t padding = num_base_bits > 10 ? 0 : 10 - num_base_bits;
407
+ return divide_longs_rounding_up(xbits + ybits + padding, 32);
408
+ }
409
+
410
+ // Explanation of padding: we write
411
+ // 1) xdelta (huffman, provides at least 1 bit, requires 12-bit lookahead)
412
+ // 2) ydeltaGolombHi (unary, provides at least 1 bit, requires 8-bit lookahead)
413
+ // 3) ydeltaGolombLo (straight B bits).
414
+ // So the 12-bit lookahead is the tight constraint, but there are at least (2 + B) bits emitted,
415
+ // so we would be safe with max (0, 10 - B) bits of padding at the end of the bitstream.
416
+ template<typename A>
417
+ size_t cpc_compressor<A>::safe_length_for_compressed_window_buf(uint64_t k) { // measured in 32-bit words
418
+ const size_t bits = 12 * k + 11; // 11 bits of padding, due to 12-bit lookahead, with 1 bit certainly present.
419
+ return divide_longs_rounding_up(bits, 32);
420
+ }
421
+
422
+ template<typename A>
423
+ uint8_t cpc_compressor<A>::determine_pseudo_phase(uint8_t lg_k, uint64_t c) {
424
+ const size_t k = 1 << lg_k;
425
+ // This mid-range logic produces pseudo-phases. They are used to select encoding tables.
426
+ // The thresholds were chosen by hand after looking at plots of measured compression.
427
+ if (1000 * c < 2375 * k) {
428
+ if ( 4 * c < 3 * k) return 16 + 0; // mid-range table
429
+ else if ( 10 * c < 11 * k) return 16 + 1; // mid-range table
430
+ else if ( 100 * c < 132 * k) return 16 + 2; // mid-range table
431
+ else if ( 3 * c < 5 * k) return 16 + 3; // mid-range table
432
+ else if (1000 * c < 1965 * k) return 16 + 4; // mid-range table
433
+ else if (1000 * c < 2275 * k) return 16 + 5; // mid-range table
434
+ else return 6; // steady-state table employed before its actual phase
435
+ } else { // This steady-state logic produces true phases. They are used to select
436
+ // encoding tables, and also column permutations for the "Sliding" flavor.
437
+ if (lg_k < 4) throw std::logic_error("lgK < 4");
438
+ const size_t tmp = c >> (lg_k - 4);
439
+ const uint8_t phase = tmp & 15;
440
+ if (phase < 0 || phase >= 16) throw std::out_of_range("wrong phase");
441
+ return phase;
442
+ }
443
+ }
444
+
445
+ static inline void maybe_flush_bitbuf(uint64_t& bitbuf, uint8_t& bufbits, uint32_t* wordarr, size_t& wordindex) {
446
+ if (bufbits >= 32) {
447
+ wordarr[wordindex++] = bitbuf & 0xffffffff;
448
+ bitbuf = bitbuf >> 32;
449
+ bufbits -= 32;
450
+ }
451
+ }
452
+
453
+ static inline void maybe_fill_bitbuf(uint64_t& bitbuf, uint8_t& bufbits, const uint32_t* wordarr, size_t& wordindex, uint8_t minbits) {
454
+ if (bufbits < minbits) {
455
+ bitbuf |= static_cast<uint64_t>(wordarr[wordindex++]) << bufbits;
456
+ bufbits += 32;
457
+ }
458
+ }
459
+
460
+ // This returns the number of compressed words that were actually used.
461
+ // It is the caller's responsibility to ensure that the compressed_words array is long enough.
462
+ template<typename A>
463
+ size_t cpc_compressor<A>::low_level_compress_bytes(
464
+ const uint8_t* byte_array, // input
465
+ size_t num_bytes_to_encode,
466
+ const uint16_t* encoding_table,
467
+ uint32_t* compressed_words // output
468
+ ) const {
469
+ uint64_t bitbuf = 0; // bits are packed into this first, then are flushed to compressed_words
470
+ uint8_t bufbits = 0; // number of bits currently in bitbuf; must be between 0 and 31
471
+ size_t next_word_index = 0;
472
+
473
+ for (size_t byte_index = 0; byte_index < num_bytes_to_encode; byte_index++) {
474
+ const uint64_t code_info = encoding_table[byte_array[byte_index]];
475
+ const uint64_t code_val = code_info & 0xfff;
476
+ const int code_len = code_info >> 12;
477
+ bitbuf |= (code_val << bufbits);
478
+ bufbits += code_len;
479
+ maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
480
+ }
481
+
482
+ // Pad the bitstream with 11 zero-bits so that the decompressor's 12-bit peek can't overrun its input.
483
+ bufbits += 11;
484
+ maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
485
+
486
+ if (bufbits > 0) { // We are done encoding now, so we flush the bit buffer.
487
+ if (bufbits >= 32) throw std::logic_error("bufbits >= 32");
488
+ compressed_words[next_word_index++] = bitbuf & 0xffffffff;
489
+ bitbuf = 0; bufbits = 0; // not really necessary
490
+ }
491
+ return next_word_index;
492
+ }
493
+
494
+ template<typename A>
495
+ void cpc_compressor<A>::low_level_uncompress_bytes(
496
+ uint8_t* byte_array, // output
497
+ size_t num_bytes_to_decode,
498
+ const uint16_t* decoding_table,
499
+ const uint32_t* compressed_words, // input
500
+ size_t num_compressed_words
501
+ ) const {
502
+ size_t word_index = 0;
503
+ uint64_t bitbuf = 0;
504
+ uint8_t bufbits = 0;
505
+
506
+ if (byte_array == nullptr) throw std::logic_error("byte_array == NULL");
507
+ if (decoding_table == nullptr) throw std::logic_error("decoding_table == NULL");
508
+ if (compressed_words == nullptr) throw std::logic_error("compressed_words == NULL");
509
+
510
+ for (size_t byte_index = 0; byte_index < num_bytes_to_decode; byte_index++) {
511
+ maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, word_index, 12); // ensure 12 bits in bit buffer
512
+
513
+ const size_t peek12 = bitbuf & 0xfff; // These 12 bits will include an entire Huffman codeword.
514
+ const uint16_t lookup = decoding_table[peek12];
515
+ const uint8_t code_word_length = lookup >> 8;
516
+ const uint8_t decoded_byte = lookup & 0xff;
517
+ byte_array[byte_index] = decoded_byte;
518
+ bitbuf >>= code_word_length;
519
+ bufbits -= code_word_length;
520
+ }
521
+ // Buffer over-run should be impossible unless there is a bug.
522
+ // However, we might as well check here.
523
+ if (word_index > num_compressed_words) throw std::logic_error("word_index > num_compressed_words");
524
+ }
525
+
526
+ static inline uint64_t read_unary(
527
+ const uint32_t* compressed_words,
528
+ size_t& next_word_index,
529
+ uint64_t& bitbuf,
530
+ uint8_t& bufbits
531
+ );
532
+
533
+ static inline void write_unary(
534
+ uint32_t* compressed_words,
535
+ size_t& next_word_index_ptr,
536
+ uint64_t& bit_buf_ptr,
537
+ uint8_t& buf_bits_ptr,
538
+ uint64_t value
539
+ );
540
+
541
+ // Here "pairs" refers to row/column pairs that specify
542
+ // the positions of surprising values in the bit matrix.
543
+
544
+ // returns the number of compressed_words actually used
545
+ template<typename A>
546
+ size_t cpc_compressor<A>::low_level_compress_pairs(
547
+ const uint32_t* pair_array, // input
548
+ size_t num_pairs_to_encode,
549
+ size_t num_base_bits,
550
+ uint32_t* compressed_words // output
551
+ ) const {
552
+ uint64_t bitbuf = 0;
553
+ uint8_t bufbits = 0;
554
+ size_t next_word_index = 0;
555
+ const uint64_t golomb_lo_mask = (1 << num_base_bits) - 1;
556
+ uint64_t predicted_row_index = 0;
557
+ uint16_t predicted_col_index = 0;
558
+
559
+ for (size_t pair_index = 0; pair_index < num_pairs_to_encode; pair_index++) {
560
+ const uint32_t row_col = pair_array[pair_index];
561
+ const uint64_t row_index = row_col >> 6;
562
+ const uint16_t col_index = row_col & 63;
563
+
564
+ if (row_index != predicted_row_index) predicted_col_index = 0;
565
+
566
+ if (row_index < predicted_row_index) throw std::logic_error("row_index < predicted_row_index");
567
+ if (col_index < predicted_col_index) throw std::logic_error("col_index < predicted_col_index");
568
+
569
+ const uint64_t y_delta = row_index - predicted_row_index;
570
+ const uint16_t x_delta = col_index - predicted_col_index;
571
+
572
+ predicted_row_index = row_index;
573
+ predicted_col_index = col_index + 1;
574
+
575
+ const uint64_t code_info = length_limited_unary_encoding_table65[x_delta];
576
+ const uint64_t code_val = code_info & 0xfff;
577
+ const uint8_t code_len = code_info >> 12;
578
+ bitbuf |= code_val << bufbits;
579
+ bufbits += code_len;
580
+ maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
581
+
582
+ const uint64_t golomb_lo = y_delta & golomb_lo_mask;
583
+ const uint64_t golomb_hi = y_delta >> num_base_bits;
584
+
585
+ write_unary(compressed_words, next_word_index, bitbuf, bufbits, golomb_hi);
586
+
587
+ bitbuf |= golomb_lo << bufbits;
588
+ bufbits += num_base_bits;
589
+ maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
590
+ }
591
+
592
+ // Pad the bitstream so that the decompressor's 12-bit peek can't overrun its input.
593
+ const uint8_t padding = (num_base_bits > 10) ? 0 : 10 - num_base_bits;
594
+ bufbits += padding;
595
+ maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
596
+
597
+ if (bufbits > 0) { // We are done encoding now, so we flush the bit buffer
598
+ if (bufbits >= 32) throw std::logic_error("bufbits >= 32");
599
+ compressed_words[next_word_index++] = bitbuf & 0xffffffff;
600
+ bitbuf = 0; bufbits = 0; // not really necessary
601
+ }
602
+
603
+ return next_word_index;
604
+ }
605
+
606
+ template<typename A>
607
+ void cpc_compressor<A>::low_level_uncompress_pairs(
608
+ uint32_t* pair_array, // output
609
+ size_t num_pairs_to_decode,
610
+ size_t num_base_bits,
611
+ const uint32_t* compressed_words, // input
612
+ size_t num_compressed_words
613
+ ) const {
614
+ size_t word_index = 0;
615
+ uint64_t bitbuf = 0;
616
+ uint8_t bufbits = 0;
617
+ const uint64_t golomb_lo_mask = (1 << num_base_bits) - 1;
618
+ uint64_t predicted_row_index = 0;
619
+ uint16_t predicted_col_index = 0;
620
+
621
+ // for each pair we need to read:
622
+ // x_delta (12-bit length-limited unary)
623
+ // y_delta_hi (unary)
624
+ // y_delta_lo (basebits)
625
+
626
+ for (size_t pair_index = 0; pair_index < num_pairs_to_decode; pair_index++) {
627
+ maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, word_index, 12); // ensure 12 bits in bit buffer
628
+ const size_t peek12 = bitbuf & 0xfff;
629
+ const uint16_t lookup = length_limited_unary_decoding_table65[peek12];
630
+ const int code_word_length = lookup >> 8;
631
+ const int16_t x_delta = lookup & 0xff;
632
+ bitbuf >>= code_word_length;
633
+ bufbits -= code_word_length;
634
+
635
+ const uint64_t golomb_hi = read_unary(compressed_words, word_index, bitbuf, bufbits);
636
+
637
+ maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, word_index, num_base_bits); // ensure num_base_bits in bit buffer
638
+ const uint64_t golomb_lo = bitbuf & golomb_lo_mask;
639
+ bitbuf >>= num_base_bits;
640
+ bufbits -= num_base_bits;
641
+ const int64_t y_delta = (golomb_hi << num_base_bits) | golomb_lo;
642
+
643
+ // Now that we have x_delta and y_delta, we can compute the pair's row and column
644
+ if (y_delta > 0) predicted_col_index = 0;
645
+ const uint64_t row_index = predicted_row_index + y_delta;
646
+ const uint16_t col_index = predicted_col_index + x_delta;
647
+ const uint32_t row_col = (row_index << 6) | col_index;
648
+ pair_array[pair_index] = row_col;
649
+ predicted_row_index = row_index;
650
+ predicted_col_index = col_index + 1;
651
+ }
652
+ if (word_index > num_compressed_words) throw std::logic_error("word_index > num_compressed_words"); // check for buffer over-run
653
+ }
654
+
655
+ uint64_t read_unary(
656
+ const uint32_t* compressed_words,
657
+ size_t& next_word_index,
658
+ uint64_t& bitbuf,
659
+ uint8_t& bufbits
660
+ ) {
661
+ if (compressed_words == nullptr) throw std::logic_error("compressed_words == NULL");
662
+ size_t subtotal = 0;
663
+ while (true) {
664
+ maybe_fill_bitbuf(bitbuf, bufbits, compressed_words, next_word_index, 8); // ensure 8 bits in bit buffer
665
+
666
+ const uint8_t peek8 = bitbuf & 0xff; // These 8 bits include either all or part of the Unary codeword
667
+ const uint8_t trailing_zeros = byte_trailing_zeros_table[peek8];
668
+
669
+ if (trailing_zeros > 8) throw std::out_of_range("trailing_zeros out of range");
670
+ if (trailing_zeros < 8) {
671
+ bufbits -= 1 + trailing_zeros;
672
+ bitbuf >>= 1 + trailing_zeros;
673
+ return subtotal + trailing_zeros;
674
+ }
675
+ // The codeword was partial, so read some more
676
+ subtotal += 8;
677
+ bufbits -= 8;
678
+ bitbuf >>= 8;
679
+ }
680
+ }
681
+
682
+ void write_unary(
683
+ uint32_t* compressed_words,
684
+ size_t& next_word_index,
685
+ uint64_t& bitbuf,
686
+ uint8_t& bufbits,
687
+ uint64_t value
688
+ ) {
689
+ if (compressed_words == nullptr) throw std::logic_error("compressed_words == NULL");
690
+ if (bufbits > 31) throw std::out_of_range("bufbits out of range");
691
+
692
+ uint64_t remaining = value;
693
+
694
+ while (remaining >= 16) {
695
+ remaining -= 16;
696
+ // Here we output 16 zeros, but we don't need to physically write them into bitbuf
697
+ // because it already contains zeros in that region.
698
+ bufbits += 16; // Record the fact that 16 bits of output have occurred.
699
+ maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
700
+ }
701
+
702
+ if (remaining > 15) throw std::out_of_range("remaining out of range");
703
+
704
+ const uint64_t the_unary_code = 1 << remaining;
705
+ bitbuf |= the_unary_code << bufbits;
706
+ bufbits += 1 + remaining;
707
+ maybe_flush_bitbuf(bitbuf, bufbits, compressed_words, next_word_index);
708
+ }
709
+
710
+ // The empty space that this leaves at the beginning of the output array
711
+ // will be filled in later by the caller.
712
+ template<typename A>
713
+ vector_u32<A> cpc_compressor<A>::tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space) {
714
+ const size_t output_length = empty_space + num_pairs_to_get;
715
+ vector_u32<A> pairs(output_length);
716
+ size_t pair_index = empty_space;
717
+ for (unsigned row_index = 0; row_index < k; row_index++) {
718
+ uint8_t byte = window[row_index];
719
+ while (byte != 0) {
720
+ const uint8_t col_index = byte_trailing_zeros_table[byte];
721
+ byte = byte ^ (1 << col_index); // erase the 1
722
+ pairs[pair_index++] = (row_index << 6) | col_index;
723
+ }
724
+ }
725
+ if (pair_index != output_length) throw std::logic_error("pair_index != output_length");
726
+ return pairs;
727
+ }
728
+
729
+ // returns an integer that is between
730
+ // zero and ceiling(log_2(k)) - 1, inclusive
731
+ template<typename A>
732
+ uint64_t cpc_compressor<A>::golomb_choose_number_of_base_bits(uint64_t k, uint64_t count) {
733
+ if (k < 1) throw std::invalid_argument("golomb_choose_number_of_base_bits: k < 1");
734
+ if (count < 1) throw std::invalid_argument("golomb_choose_number_of_base_bits: count < 1");
735
+ const uint64_t quotient = (k - count) / count; // integer division
736
+ if (quotient == 0) return 0;
737
+ else return long_floor_log2_of_long(quotient);
738
+ }
739
+
740
+ } /* namespace datasketches */
741
+
742
+ #endif