datasketches 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,91 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _COUPONLIST_HPP_
21
+ #define _COUPONLIST_HPP_
22
+
23
+ #include "HllSketchImpl.hpp"
24
+ #include "coupon_iterator.hpp"
25
+
26
+ #include <iostream>
27
+
28
+ namespace datasketches {
29
+
30
+ template<typename A>
31
+ class HllSketchImplFactory;
32
+
33
+ template<typename A = std::allocator<char>>
34
+ class CouponList : public HllSketchImpl<A> {
35
+ public:
36
+ explicit CouponList(int lgConfigK, target_hll_type tgtHllType, hll_mode mode);
37
+ explicit CouponList(const CouponList& that);
38
+ explicit CouponList(const CouponList& that, target_hll_type tgtHllType);
39
+
40
+ static CouponList* newList(const void* bytes, size_t len);
41
+ static CouponList* newList(std::istream& is);
42
+ virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const;
43
+ virtual void serialize(std::ostream& os, bool compact) const;
44
+
45
+ virtual ~CouponList();
46
+ virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
47
+
48
+ virtual CouponList* copy() const;
49
+ virtual CouponList* copyAs(target_hll_type tgtHllType) const;
50
+
51
+ virtual HllSketchImpl<A>* couponUpdate(int coupon);
52
+
53
+ virtual double getEstimate() const;
54
+ virtual double getCompositeEstimate() const;
55
+ virtual double getUpperBound(int numStdDev) const;
56
+ virtual double getLowerBound(int numStdDev) const;
57
+
58
+ virtual bool isEmpty() const;
59
+ virtual int getCouponCount() const;
60
+
61
+ coupon_iterator<A> begin(bool all = false) const;
62
+ coupon_iterator<A> end() const;
63
+
64
+ protected:
65
+ typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc;
66
+
67
+ HllSketchImpl<A>* promoteHeapListToSet(CouponList& list);
68
+ HllSketchImpl<A>* promoteHeapListOrSetToHll(CouponList& src);
69
+
70
+ virtual int getUpdatableSerializationBytes() const;
71
+ virtual int getCompactSerializationBytes() const;
72
+ virtual int getMemDataStart() const;
73
+ virtual int getPreInts() const;
74
+ virtual bool isCompact() const;
75
+ virtual bool isOutOfOrderFlag() const;
76
+ virtual void putOutOfOrderFlag(bool oooFlag);
77
+
78
+ virtual int getLgCouponArrInts() const;
79
+ virtual int* getCouponIntArr() const;
80
+
81
+ int lgCouponArrInts;
82
+ int couponCount;
83
+ bool oooFlag;
84
+ int* couponIntArr;
85
+
86
+ friend class HllSketchImplFactory<A>;
87
+ };
88
+
89
+ }
90
+
91
+ #endif /* _COUPONLIST_HPP_ */
@@ -0,0 +1,233 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _CUBICINTERPOLATION_INTERNAL_HPP_
21
+ #define _CUBICINTERPOLATION_INTERNAL_HPP_
22
+
23
+ #include "CubicInterpolation.hpp"
24
+
25
+ #include <string>
26
+ #include <stdexcept>
27
+
28
+ namespace datasketches {
29
+
30
+ template<typename A>
31
+ static double interpolateUsingXAndYTables(const double xArr[], const double yArr[], const int offset, const double x);
32
+
33
+ template<typename A>
34
+ static double cubicInterpolate(const double x0, const double y0, const double x1, const double y1,
35
+ const double x2, const double y2, const double x3, const double y3, const double x);
36
+
37
+ template<typename A>
38
+ static int findStraddle(const double xArr[], const int len, const double x);
39
+
40
+ template<typename A>
41
+ static int recursiveFindStraddle(const double xArr[], const int l, const int r, const double x);
42
+
43
+ template<typename A>
44
+ static double interpolateUsingXArrAndYStride(const double xArr[], const double yStride,
45
+ const int offset, const double x);
46
+
47
+ const int numEntries = 40;
48
+
49
+ //Computed for Coupon lgK = 26 ONLY. Designed for the cubic interpolator function.
50
+ const double xArrComputed[numEntries] = {
51
+ 0.0, 1.0, 20.0, 400.0,
52
+ 8000.0, 160000.0, 300000.0, 600000.0,
53
+ 900000.0, 1200000.0, 1500000.0, 1800000.0,
54
+ 2100000.0, 2400000.0, 2700000.0, 3000000.0,
55
+ 3300000.0, 3600000.0, 3900000.0, 4200000.0,
56
+ 4500000.0, 4800000.0, 5100000.0, 5400000.0,
57
+ 5700000.0, 6000000.0, 6300000.0, 6600000.0,
58
+ 6900000.0, 7200000.0, 7500000.0, 7800000.0,
59
+ 8100000.0, 8400000.0, 8700000.0, 9000000.0,
60
+ 9300000.0, 9600000.0, 9900000.0, 10200000.0
61
+ };
62
+
63
+ //Computed for Coupon lgK = 26 ONLY. Designed for the cubic interpolator function.
64
+ const double yArrComputed[numEntries] = {
65
+ 0.0000000000000000, 1.0000000000000000, 20.0000009437402611, 400.0003963713384110,
66
+ 8000.1589294602090376, 160063.6067763759638183, 300223.7071597663452849, 600895.5933856170158833,
67
+ 902016.8065120954997838, 1203588.4983199508860707, 1505611.8245524743106216, 1808087.9449319066479802,
68
+ 2111018.0231759352609515, 2414403.2270142501220107, 2718244.7282051891088486, 3022543.7025524540804327,
69
+ 3327301.3299219091422856, 3632518.7942584538832307, 3938197.2836029687896371, 4244337.9901093561202288,
70
+ 4550942.1100616492331028, 4858010.8438911894336343, 5165545.3961938973516226, 5473546.9757476449012756,
71
+ 5782016.7955296505242586, 6090956.0727340159937739, 6400366.0287892958149314, 6710247.8893762007355690,
72
+ 7020602.8844453142955899, 7331432.2482349723577499, 7642737.2192891482263803, 7954519.0404754765331745,
73
+ 8266778.9590033423155546, 8579518.2264420464634895, 8892738.0987390466034412, 9206439.8362383283674717,
74
+ 9520624.7036988288164139, 9835293.9703129194676876, 10150448.9097250290215015, 10466090.8000503256917000
75
+ };
76
+
77
+ template<typename A>
78
+ double CubicInterpolation<A>::usingXAndYTables(const double x) {
79
+ return usingXAndYTables(xArrComputed, yArrComputed, numEntries, x);
80
+ }
81
+
82
+ template<typename A>
83
+ double CubicInterpolation<A>::usingXAndYTables(const double xArr[], const double yArr[],
84
+ const int len, const double x) {
85
+ int offset;
86
+ if (x < xArr[0] || x > xArr[len-1]) {
87
+ throw std::invalid_argument("x value out of range: " + std::to_string(x));
88
+ }
89
+
90
+ if (x == xArr[len-1]) { // corner case
91
+ return (yArr[len-1]);
92
+ }
93
+
94
+ offset = findStraddle<A>(xArr, len, x);
95
+ if (offset < 0 && offset > len-2) {
96
+ throw std::logic_error("offset must be >= 0 and <= " + std::to_string(len) + "-2");
97
+ }
98
+
99
+ if (offset == 0) { // corner case
100
+ return (interpolateUsingXAndYTables<A>(xArr, yArr, (offset-0), x));
101
+ }
102
+ else if (offset == numEntries-2) { // corner case
103
+ return (interpolateUsingXAndYTables<A>(xArr, yArr, (offset-2), x));
104
+ }
105
+ else { // main case
106
+ return (interpolateUsingXAndYTables<A>(xArr, yArr, (offset-1), x));
107
+ }
108
+ throw std::logic_error("Exception should be unreachable");
109
+ }
110
+
111
+ // In C: again-two-registers cubic_interpolate_aux L1368
112
+ template<typename A>
113
+ static double interpolateUsingXAndYTables(const double xArr[], const double yArr[],
114
+ const int offset, const double x) {
115
+ return (cubicInterpolate<A>(xArr[offset+0], yArr[offset+0],
116
+ xArr[offset+1], yArr[offset+1],
117
+ xArr[offset+2], yArr[offset+2],
118
+ xArr[offset+3], yArr[offset+3],
119
+ x) );
120
+ }
121
+
122
+ template<typename A>
123
+ static inline double cubicInterpolate(const double x0, const double y0,
124
+ const double x1, const double y1,
125
+ const double x2, const double y2,
126
+ const double x3, const double y3,
127
+ const double x)
128
+ {
129
+ double l0_numer = (x - x1) * (x - x2) * (x - x3);
130
+ double l1_numer = (x - x0) * (x - x2) * (x - x3);
131
+ double l2_numer = (x - x0) * (x - x1) * (x - x3);
132
+ double l3_numer = (x - x0) * (x - x1) * (x - x2);
133
+
134
+ double l0_denom = (x0 - x1) * (x0 - x2) * (x0 - x3);
135
+ double l1_denom = (x1 - x0) * (x1 - x2) * (x1 - x3);
136
+ double l2_denom = (x2 - x0) * (x2 - x1) * (x2 - x3);
137
+ double l3_denom = (x3 - x0) * (x3 - x1) * (x3 - x2);
138
+
139
+ double term0 = y0 * l0_numer / l0_denom;
140
+ double term1 = y1 * l1_numer / l1_denom;
141
+ double term2 = y2 * l2_numer / l2_denom;
142
+ double term3 = y3 * l3_numer / l3_denom;
143
+
144
+ return (term0 + term1 + term2 + term3);
145
+ }
146
+
147
+ /* returns j such that xArr[j] <= x and x < xArr[j+1] */
148
+ template<typename A>
149
+ static int findStraddle(const double xArr[], const int len, const double x)
150
+ {
151
+ if ((len < 2) || (x < xArr[0]) || (x > xArr[len-1])) {
152
+ throw std::logic_error("invariant violated during interpolation");
153
+ }
154
+ return(recursiveFindStraddle<A>(xArr, 0, len-1, x));
155
+ }
156
+
157
+
158
+ /* the invariant here is that xArr[l] <= x && x < xArr[r] */
159
+ template<typename A>
160
+ static int recursiveFindStraddle(const double xArr[], const int l, const int r, const double x)
161
+ {
162
+ int m;
163
+ if (l >= r) {
164
+ throw std::logic_error("lower bound not less than upper bound in search");
165
+ }
166
+ if ((xArr[l] > x) || (x >= xArr[r])) { // the invariant
167
+ throw std::logic_error("target value invariant violated in search");
168
+ }
169
+
170
+ if (l+1 == r) return (l);
171
+ m = l + ((r-l)/2);
172
+ if (xArr[m] <= x) return (recursiveFindStraddle<A>(xArr, m, r, x));
173
+ else return (recursiveFindStraddle<A>(xArr, l, m, x));
174
+ }
175
+
176
+
177
+ //Interpolate using X table and Y stride
178
+
179
+ /**
180
+ * Cubic interpolation using interpolation X table and Y stride.
181
+ *
182
+ * @param xArr The x array
183
+ * @param yStride The y stride
184
+ * @param xArrLen the length of xArr
185
+ * @param x The value x
186
+ * @return cubic interpolation
187
+ */
188
+ //In C: again-two-registers cubic_interpolate_with_x_arr_and_y_stride L1411
189
+ // Used by HllEstimators
190
+ template<typename A>
191
+ double CubicInterpolation<A>::usingXArrAndYStride(const double xArr[], const int xArrLen,
192
+ const double yStride, const double x) {
193
+ const int xArrLenM1 = xArrLen - 1;
194
+
195
+ if ((xArrLen < 4) || (x < xArr[0]) || (x > xArr[xArrLenM1])) {
196
+ throw std::logic_error("impossible values during interpolaiton");
197
+ }
198
+
199
+ if (x == xArr[xArrLenM1]) { /* corner case */
200
+ return (yStride * (xArrLenM1));
201
+ }
202
+
203
+ const int offset = findStraddle<A>(xArr, xArrLen, x); //uses recursion
204
+ const int xArrLenM2 = xArrLen - 2;
205
+ if ((offset < 0) || (offset > xArrLenM2)) {
206
+ throw std::logic_error("invalid offset during interpolation");
207
+ }
208
+
209
+ if (offset == 0) { /* corner case */
210
+ return (interpolateUsingXArrAndYStride<A>(xArr, yStride, (offset - 0), x));
211
+ }
212
+ else if (offset == xArrLenM2) { /* corner case */
213
+ return (interpolateUsingXArrAndYStride<A>(xArr, yStride, (offset - 2), x));
214
+ }
215
+ /* main case */
216
+ return (interpolateUsingXArrAndYStride<A>(xArr, yStride, (offset - 1), x));
217
+ }
218
+
219
+ //In C: again-two-registers cubic_interpolate_with_x_arr_and_y_stride_aux L1402
220
+ template<typename A>
221
+ static double interpolateUsingXArrAndYStride(const double xArr[], const double yStride,
222
+ const int offset, const double x) {
223
+ return cubicInterpolate<A>(
224
+ xArr[offset + 0], yStride * (offset + 0),
225
+ xArr[offset + 1], yStride * (offset + 1),
226
+ xArr[offset + 2], yStride * (offset + 2),
227
+ xArr[offset + 3], yStride * (offset + 3),
228
+ x);
229
+ }
230
+
231
+ }
232
+
233
+ #endif // _CUBICINTERPOLATION_INTERNAL_HPP_
@@ -0,0 +1,43 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _CUBICINTERPOLATION_HPP_
21
+ #define _CUBICINTERPOLATION_HPP_
22
+
23
+ #include <memory>
24
+
25
+ namespace datasketches {
26
+
27
+ template<typename A = std::allocator<char>>
28
+ class CubicInterpolation {
29
+ public:
30
+ static double usingXAndYTables(const double xArr[], const double yArr[],
31
+ int len, double x);
32
+
33
+ static double usingXAndYTables(double x);
34
+
35
+ static double usingXArrAndYStride(const double xArr[], const int xArrLen,
36
+ double yStride, double x);
37
+ };
38
+
39
+ }
40
+
41
+ #include "CubicInterpolation-internal.hpp"
42
+
43
+ #endif /* _CUBICINTERPOLATION_HPP_ */
@@ -0,0 +1,90 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _HARMONICNUMBERS_INTERNAL_HPP_
21
+ #define _HARMONICNUMBERS_INTERNAL_HPP_
22
+
23
+ #include "HarmonicNumbers.hpp"
24
+
25
+ #include <cmath>
26
+
27
+ namespace datasketches {
28
+
29
+ template<typename A>
30
+ double HarmonicNumbers<A>::getBitMapEstimate(const int bitVectorLength, const int numBitsSet) {
31
+ return (bitVectorLength * (harmonicNumber(bitVectorLength) - harmonicNumber(bitVectorLength - numBitsSet)));
32
+ }
33
+
34
+ static const int NUM_EXACT_HARMONIC_NUMBERS = 25;
35
+
36
+ static double tableOfExactHarmonicNumbers[] = {
37
+ 0.0, // 0
38
+ 1.0, // 1
39
+ 1.5, // 2
40
+ 11.0 / 6.0, // 3
41
+ 25.0 / 12.0, // 4
42
+ 137.0 / 60.0, // 5
43
+ 49.0 / 20.0, // 6
44
+ 363.0 / 140.0, // 7
45
+ 761.0 / 280.0, // 8
46
+ 7129.0 / 2520.0, // 9
47
+ 7381.0 / 2520.0, // 10
48
+ 83711.0 / 27720.0, // 11
49
+ 86021.0 / 27720.0, // 12
50
+ 1145993.0 / 360360.0, // 13
51
+ 1171733.0 / 360360.0, // 14
52
+ 1195757.0 / 360360.0, // 15
53
+ 2436559.0 / 720720.0, // 16
54
+ 42142223.0 / 12252240.0, // 17
55
+ 14274301.0 / 4084080.0, // 18
56
+ 275295799.0 / 77597520.0, // 19
57
+ 55835135.0 / 15519504.0, // 20
58
+ 18858053.0 / 5173168.0, // 21
59
+ 19093197.0 / 5173168.0, // 22
60
+ 444316699.0 / 118982864.0, // 23
61
+ 1347822955.0 / 356948592.0 // 24
62
+ };
63
+
64
+ static const double EULER_MASCHERONI_CONSTANT = 0.577215664901532860606512090082;
65
+
66
+ template<typename A>
67
+ double HarmonicNumbers<A>::harmonicNumber(const uint64_t x_i) {
68
+ if (x_i < NUM_EXACT_HARMONIC_NUMBERS) {
69
+ return tableOfExactHarmonicNumbers[x_i];
70
+ } else {
71
+ double x = x_i;
72
+ double invSq = 1.0 / (x * x);
73
+ double sum = log(x) + EULER_MASCHERONI_CONSTANT + (1.0 / (2.0 * x));
74
+ /* note: the number of terms included from this series expansion is appropriate
75
+ for the size of the exact table (25) and the precision of doubles */
76
+ double pow = invSq; // now n^-2
77
+ sum -= pow * (1.0 / 12.0);
78
+ pow *= invSq; // now n^-4
79
+ sum += pow * (1.0 / 120.0);
80
+ pow *= invSq; /* now n^-6 */
81
+ sum -= pow * (1.0 / 252.0);
82
+ pow *= invSq; /* now n^-8 */
83
+ sum += pow * (1.0 / 240.0);
84
+ return sum;
85
+ }
86
+ }
87
+
88
+ }
89
+
90
+ #endif // _HARMONICNUMBERS_INTERNAL_HPP_