datasketches 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,238 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ namespace datasketches {
21
+
22
+ template<typename A>
23
+ update_array_of_doubles_sketch_alloc<A>::update_array_of_doubles_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
24
+ uint64_t theta, uint64_t seed, const array_of_doubles_update_policy<A>& policy, const A& allocator):
25
+ Base(lg_cur_size, lg_nom_size, rf, theta, seed, policy, allocator) {}
26
+
27
+
28
+ template<typename A>
29
+ uint8_t update_array_of_doubles_sketch_alloc<A>::get_num_values() const {
30
+ return this->policy_.get_num_values();
31
+ }
32
+
33
+ template<typename A>
34
+ compact_array_of_doubles_sketch_alloc<A> update_array_of_doubles_sketch_alloc<A>::compact(bool ordered) const {
35
+ return compact_array_of_doubles_sketch_alloc<A>(*this, ordered);
36
+ }
37
+
38
+ // builder
39
+
40
+ template<typename A>
41
+ update_array_of_doubles_sketch_alloc<A>::builder::builder(const array_of_doubles_update_policy<A>& policy, const A& allocator):
42
+ tuple_base_builder<builder, array_of_doubles_update_policy<A>, A>(policy, allocator) {}
43
+
44
+ template<typename A>
45
+ update_array_of_doubles_sketch_alloc<A> update_array_of_doubles_sketch_alloc<A>::builder::build() const {
46
+ return update_array_of_doubles_sketch_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
47
+ }
48
+
49
+ // compact sketch
50
+
51
+ template<typename A>
52
+ template<typename S>
53
+ compact_array_of_doubles_sketch_alloc<A>::compact_array_of_doubles_sketch_alloc(const S& other, bool ordered):
54
+ Base(other, ordered), num_values_(other.get_num_values()) {}
55
+
56
+ template<typename A>
57
+ compact_array_of_doubles_sketch_alloc<A>::compact_array_of_doubles_sketch_alloc(bool is_empty, bool is_ordered,
58
+ uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries, uint8_t num_values):
59
+ Base(is_empty, is_ordered, seed_hash, theta, std::move(entries)), num_values_(num_values) {}
60
+
61
+ template<typename A>
62
+ compact_array_of_doubles_sketch_alloc<A>::compact_array_of_doubles_sketch_alloc(uint8_t num_values, Base&& base):
63
+ Base(std::move(base)), num_values_(num_values) {}
64
+
65
+ template<typename A>
66
+ uint8_t compact_array_of_doubles_sketch_alloc<A>::get_num_values() const {
67
+ return num_values_;
68
+ }
69
+
70
+ template<typename A>
71
+ void compact_array_of_doubles_sketch_alloc<A>::serialize(std::ostream& os) const {
72
+ const uint8_t preamble_longs = 1;
73
+ os.write(reinterpret_cast<const char*>(&preamble_longs), sizeof(preamble_longs));
74
+ const uint8_t serial_version = SERIAL_VERSION;
75
+ os.write(reinterpret_cast<const char*>(&serial_version), sizeof(serial_version));
76
+ const uint8_t family = SKETCH_FAMILY;
77
+ os.write(reinterpret_cast<const char*>(&family), sizeof(family));
78
+ const uint8_t type = SKETCH_TYPE;
79
+ os.write(reinterpret_cast<const char*>(&type), sizeof(type));
80
+ const uint8_t flags_byte(
81
+ (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
82
+ (this->get_num_retained() > 0 ? 1 << flags::HAS_ENTRIES : 0) |
83
+ (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
84
+ );
85
+ os.write(reinterpret_cast<const char*>(&flags_byte), sizeof(flags_byte));
86
+ os.write(reinterpret_cast<const char*>(&num_values_), sizeof(num_values_));
87
+ const uint16_t seed_hash = this->get_seed_hash();
88
+ os.write(reinterpret_cast<const char*>(&seed_hash), sizeof(seed_hash));
89
+ os.write(reinterpret_cast<const char*>(&(this->theta_)), sizeof(uint64_t));
90
+ if (this->get_num_retained() > 0) {
91
+ const uint32_t num_entries = this->entries_.size();
92
+ os.write(reinterpret_cast<const char*>(&num_entries), sizeof(num_entries));
93
+ const uint32_t unused32 = 0;
94
+ os.write(reinterpret_cast<const char*>(&unused32), sizeof(unused32));
95
+ for (const auto& it: this->entries_) {
96
+ os.write(reinterpret_cast<const char*>(&it.first), sizeof(uint64_t));
97
+ }
98
+ for (const auto& it: this->entries_) {
99
+ os.write(reinterpret_cast<const char*>(it.second.data()), it.second.size() * sizeof(double));
100
+ }
101
+ }
102
+ }
103
+
104
+ template<typename A>
105
+ auto compact_array_of_doubles_sketch_alloc<A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
106
+ const uint8_t preamble_longs = 1;
107
+ const size_t size = header_size_bytes + 16 // preamble and theta
108
+ + (this->entries_.size() > 0 ? 8 : 0)
109
+ + (sizeof(uint64_t) + sizeof(double) * num_values_) * this->entries_.size();
110
+ vector_bytes bytes(size, 0, this->entries_.get_allocator());
111
+ uint8_t* ptr = bytes.data() + header_size_bytes;
112
+
113
+ ptr += copy_to_mem(&preamble_longs, ptr, sizeof(preamble_longs));
114
+ const uint8_t serial_version = SERIAL_VERSION;
115
+ ptr += copy_to_mem(&serial_version, ptr, sizeof(serial_version));
116
+ const uint8_t family = SKETCH_FAMILY;
117
+ ptr += copy_to_mem(&family, ptr, sizeof(family));
118
+ const uint8_t type = SKETCH_TYPE;
119
+ ptr += copy_to_mem(&type, ptr, sizeof(type));
120
+ const uint8_t flags_byte(
121
+ (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
122
+ (this->get_num_retained() ? 1 << flags::HAS_ENTRIES : 0) |
123
+ (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
124
+ );
125
+ ptr += copy_to_mem(&flags_byte, ptr, sizeof(flags_byte));
126
+ ptr += copy_to_mem(&num_values_, ptr, sizeof(num_values_));
127
+ const uint16_t seed_hash = this->get_seed_hash();
128
+ ptr += copy_to_mem(&seed_hash, ptr, sizeof(seed_hash));
129
+ ptr += copy_to_mem(&(this->theta_), ptr, sizeof(uint64_t));
130
+ if (this->get_num_retained() > 0) {
131
+ const uint32_t num_entries = this->entries_.size();
132
+ ptr += copy_to_mem(&num_entries, ptr, sizeof(num_entries));
133
+ const uint32_t unused32 = 0;
134
+ ptr += copy_to_mem(&unused32, ptr, sizeof(unused32));
135
+ for (const auto& it: this->entries_) {
136
+ ptr += copy_to_mem(&it.first, ptr, sizeof(uint64_t));
137
+ }
138
+ for (const auto& it: this->entries_) {
139
+ ptr += copy_to_mem(it.second.data(), ptr, it.second.size() * sizeof(double));
140
+ }
141
+ }
142
+ return bytes;
143
+ }
144
+
145
+ template<typename A>
146
+ compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) {
147
+ uint8_t preamble_longs;
148
+ is.read(reinterpret_cast<char*>(&preamble_longs), sizeof(preamble_longs));
149
+ uint8_t serial_version;
150
+ is.read(reinterpret_cast<char*>(&serial_version), sizeof(serial_version));
151
+ uint8_t family;
152
+ is.read(reinterpret_cast<char*>(&family), sizeof(family));
153
+ uint8_t type;
154
+ is.read(reinterpret_cast<char*>(&type), sizeof(type));
155
+ uint8_t flags_byte;
156
+ is.read(reinterpret_cast<char*>(&flags_byte), sizeof(flags_byte));
157
+ uint8_t num_values;
158
+ is.read(reinterpret_cast<char*>(&num_values), sizeof(num_values));
159
+ uint16_t seed_hash;
160
+ is.read(reinterpret_cast<char*>(&seed_hash), sizeof(seed_hash));
161
+ checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
162
+ checker<true>::check_sketch_family(family, SKETCH_FAMILY);
163
+ checker<true>::check_sketch_type(type, SKETCH_TYPE);
164
+ const bool has_entries = flags_byte & (1 << flags::HAS_ENTRIES);
165
+ if (has_entries) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
166
+
167
+ uint64_t theta;
168
+ is.read(reinterpret_cast<char*>(&theta), sizeof(theta));
169
+ std::vector<Entry, AllocEntry> entries(allocator);
170
+ if (has_entries) {
171
+ uint32_t num_entries;
172
+ is.read(reinterpret_cast<char*>(&num_entries), sizeof(num_entries));
173
+ uint32_t unused32;
174
+ is.read(reinterpret_cast<char*>(&unused32), sizeof(unused32));
175
+ entries.reserve(num_entries);
176
+ std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator);
177
+ is.read(reinterpret_cast<char*>(keys.data()), num_entries * sizeof(uint64_t));
178
+ for (size_t i = 0; i < num_entries; ++i) {
179
+ aod<A> summary(num_values, allocator);
180
+ is.read(reinterpret_cast<char*>(summary.data()), num_values * sizeof(double));
181
+ entries.push_back(Entry(keys[i], std::move(summary)));
182
+ }
183
+ }
184
+ if (!is.good()) throw std::runtime_error("error reading from std::istream");
185
+ const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
186
+ const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
187
+ return compact_array_of_doubles_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries), num_values);
188
+ }
189
+
190
+ template<typename A>
191
+ compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) {
192
+ ensure_minimum_memory(size, 16);
193
+ const char* ptr = static_cast<const char*>(bytes);
194
+ uint8_t preamble_longs;
195
+ ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
196
+ uint8_t serial_version;
197
+ ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
198
+ uint8_t family;
199
+ ptr += copy_from_mem(ptr, &family, sizeof(family));
200
+ uint8_t type;
201
+ ptr += copy_from_mem(ptr, &type, sizeof(type));
202
+ uint8_t flags_byte;
203
+ ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
204
+ uint8_t num_values;
205
+ ptr += copy_from_mem(ptr, &num_values, sizeof(num_values));
206
+ uint16_t seed_hash;
207
+ ptr += copy_from_mem(ptr, &seed_hash, sizeof(seed_hash));
208
+ checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
209
+ checker<true>::check_sketch_family(family, SKETCH_FAMILY);
210
+ checker<true>::check_sketch_type(type, SKETCH_TYPE);
211
+ const bool has_entries = flags_byte & (1 << flags::HAS_ENTRIES);
212
+ if (has_entries) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
213
+
214
+ uint64_t theta;
215
+ ptr += copy_from_mem(ptr, &theta, sizeof(theta));
216
+ std::vector<Entry, AllocEntry> entries(allocator);
217
+ if (has_entries) {
218
+ ensure_minimum_memory(size, 24);
219
+ uint32_t num_entries;
220
+ ptr += copy_from_mem(ptr, &num_entries, sizeof(num_entries));
221
+ uint32_t unused32;
222
+ ptr += copy_from_mem(ptr, &unused32, sizeof(unused32));
223
+ ensure_minimum_memory(size, 24 + (sizeof(uint64_t) + sizeof(double) * num_values) * num_entries);
224
+ entries.reserve(num_entries);
225
+ std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator);
226
+ ptr += copy_from_mem(ptr, keys.data(), sizeof(uint64_t) * num_entries);
227
+ for (size_t i = 0; i < num_entries; ++i) {
228
+ aod<A> summary(num_values, allocator);
229
+ ptr += copy_from_mem(ptr, summary.data(), num_values * sizeof(double));
230
+ entries.push_back(Entry(keys[i], std::move(summary)));
231
+ }
232
+ }
233
+ const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
234
+ const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
235
+ return compact_array_of_doubles_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries), num_values);
236
+ }
237
+
238
+ } /* namespace datasketches */
@@ -0,0 +1,81 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef ARRAY_OF_DOUBLES_UNION_HPP_
21
+ #define ARRAY_OF_DOUBLES_UNION_HPP_
22
+
23
+ #include <vector>
24
+ #include <memory>
25
+
26
+ #include "array_of_doubles_sketch.hpp"
27
+ #include "tuple_union.hpp"
28
+
29
+ namespace datasketches {
30
+
31
+ template<typename A = std::allocator<double>>
32
+ struct array_of_doubles_union_policy_alloc {
33
+ array_of_doubles_union_policy_alloc(uint8_t num_values = 1): num_values_(num_values) {}
34
+
35
+ void operator()(aod<A>& summary, const aod<A>& other) const {
36
+ for (size_t i = 0; i < summary.size(); ++i) {
37
+ summary[i] += other[i];
38
+ }
39
+ }
40
+
41
+ uint8_t get_num_values() const {
42
+ return num_values_;
43
+ }
44
+ private:
45
+ uint8_t num_values_;
46
+ };
47
+
48
+ using array_of_doubles_union_policy = array_of_doubles_union_policy_alloc<>;
49
+
50
+ template<typename Allocator = std::allocator<double>>
51
+ class array_of_doubles_union_alloc: public tuple_union<aod<Allocator>, array_of_doubles_union_policy_alloc<Allocator>, AllocAOD<Allocator>> {
52
+ public:
53
+ using Policy = array_of_doubles_union_policy_alloc<Allocator>;
54
+ using Base = tuple_union<aod<Allocator>, Policy, AllocAOD<Allocator>>;
55
+ using CompactSketch = compact_array_of_doubles_sketch_alloc<Allocator>;
56
+ using resize_factor = theta_constants::resize_factor;
57
+
58
+ class builder;
59
+
60
+ CompactSketch get_result(bool ordered = true) const;
61
+
62
+ private:
63
+ // for builder
64
+ array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
65
+ };
66
+
67
+ template<typename Allocator>
68
+ class array_of_doubles_union_alloc<Allocator>::builder: public tuple_base_builder<builder, array_of_doubles_union_policy_alloc<Allocator>, Allocator> {
69
+ public:
70
+ builder(const array_of_doubles_union_policy_alloc<Allocator>& policy = array_of_doubles_union_policy_alloc<Allocator>(), const Allocator& allocator = Allocator());
71
+ array_of_doubles_union_alloc<Allocator> build() const;
72
+ };
73
+
74
+ // alias with default allocator
75
+ using array_of_doubles_union = array_of_doubles_union_alloc<>;
76
+
77
+ } /* namespace datasketches */
78
+
79
+ #include "array_of_doubles_union_impl.hpp"
80
+
81
+ #endif
@@ -0,0 +1,43 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ namespace datasketches {
21
+
22
+ template<typename A>
23
+ array_of_doubles_union_alloc<A>::array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const A& allocator):
24
+ Base(lg_cur_size, lg_nom_size, rf, theta, seed, policy, allocator)
25
+ {}
26
+
27
+ template<typename A>
28
+ auto array_of_doubles_union_alloc<A>::get_result(bool ordered) const -> CompactSketch {
29
+ return compact_array_of_doubles_sketch_alloc<A>(this->state_.get_policy().get_policy().get_num_values(), Base::get_result(ordered));
30
+ }
31
+
32
+ // builder
33
+
34
+ template<typename A>
35
+ array_of_doubles_union_alloc<A>::builder::builder(const Policy& policy, const A& allocator):
36
+ tuple_base_builder<builder, Policy, A>(policy, allocator) {}
37
+
38
+ template<typename A>
39
+ array_of_doubles_union_alloc<A> array_of_doubles_union_alloc<A>::builder::build() const {
40
+ return array_of_doubles_union_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
41
+ }
42
+
43
+ } /* namespace datasketches */
@@ -0,0 +1,135 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef BOUNDS_ON_RATIOS_IN_SAMPLED_SETS_HPP_
21
+ #define BOUNDS_ON_RATIOS_IN_SAMPLED_SETS_HPP_
22
+
23
+ #include <cstdint>
24
+
25
+ #include <bounds_binomial_proportions.hpp>
26
+
27
+ namespace datasketches {
28
+
29
+ /**
30
+ * This class is used to compute the bounds on the estimate of the ratio <i>|B| / |A|</i>, where:
31
+ * <ul>
32
+ * <li><i>|A|</i> is the unknown size of a set <i>A</i> of unique identifiers.</li>
33
+ * <li><i>|B|</i> is the unknown size of a subset <i>B</i> of <i>A</i>.</li>
34
+ * <li><i>a</i> = <i>|S<sub>A</sub>|</i> is the observed size of a sample of <i>A</i>
35
+ * that was obtained by Bernoulli sampling with a known inclusion probability <i>f</i>.</li>
36
+ * <li><i>b</i> = <i>|S<sub>A</sub> &cap; B|</i> is the observed size of a subset
37
+ * of <i>S<sub>A</sub></i>.</li>
38
+ * </ul>
39
+ */
40
+ class bounds_on_ratios_in_sampled_sets {
41
+ public:
42
+ static constexpr double NUM_STD_DEVS = 2.0;
43
+
44
+ /**
45
+ * Return the approximate lower bound based on a 95% confidence interval
46
+ * @param a See class javadoc
47
+ * @param b See class javadoc
48
+ * @param f the inclusion probability used to produce the set with size <i>a</i> and should
49
+ * generally be less than 0.5. Above this value, the results not be reliable.
50
+ * When <i>f</i> = 1.0 this returns the estimate.
51
+ * @return the approximate upper bound
52
+ */
53
+ static double lower_bound_for_b_over_a(uint64_t a, uint64_t b, double f) {
54
+ check_inputs(a, b, f);
55
+ if (a == 0) return 0.0;
56
+ if (f == 1.0) return static_cast<double>(b) / static_cast<double>(a);
57
+ return bounds_binomial_proportions::approximate_lower_bound_on_p(a, b, NUM_STD_DEVS * hacky_adjuster(f));
58
+ }
59
+
60
+ /**
61
+ * Return the approximate upper bound based on a 95% confidence interval
62
+ * @param a See class javadoc
63
+ * @param b See class javadoc
64
+ * @param f the inclusion probability used to produce the set with size <i>a</i>.
65
+ * @return the approximate lower bound
66
+ */
67
+ static double upper_bound_for_b_over_a(uint64_t a, uint64_t b, double f) {
68
+ check_inputs(a, b, f);
69
+ if (a == 0) return 1.0;
70
+ if (f == 1.0) return static_cast<double>(b) / static_cast<double>(a);
71
+ return bounds_binomial_proportions::approximate_upper_bound_on_p(a, b, NUM_STD_DEVS * hacky_adjuster(f));
72
+ }
73
+
74
+ /**
75
+ * Return the estimate of b over a
76
+ * @param a See class javadoc
77
+ * @param b See class javadoc
78
+ * @return the estimate of b over a
79
+ */
80
+ static double get_estimate_of_b_over_a(uint64_t a, uint64_t b) {
81
+ check_inputs(a, b, 0.3);
82
+ if (a == 0) return 0.5;
83
+ return static_cast<double>(b) / static_cast<double>(a);
84
+ }
85
+
86
+ /**
87
+ * Return the estimate of A. See class javadoc.
88
+ * @param a See class javadoc
89
+ * @param f the inclusion probability used to produce the set with size <i>a</i>.
90
+ * @return the approximate lower bound
91
+ */
92
+ static double estimate_of_a(uint64_t a, uint64_t f) {
93
+ check_inputs(a, 1, f);
94
+ return a / f;
95
+ }
96
+
97
+ /**
98
+ * Return the estimate of B. See class javadoc.
99
+ * @param b See class javadoc
100
+ * @param f the inclusion probability used to produce the set with size <i>b</i>.
101
+ * @return the approximate lower bound
102
+ */
103
+ static double estimate_of_b(uint64_t b, double f) {
104
+ check_inputs(b + 1, b, f);
105
+ return b / f;
106
+ }
107
+
108
+ private:
109
+ /**
110
+ * This hackyAdjuster is tightly coupled with the width of the confidence interval normally
111
+ * specified with number of standard deviations. To simplify this interface the number of
112
+ * standard deviations has been fixed to 2.0, which corresponds to a confidence interval of
113
+ * 95%.
114
+ * @param f the inclusion probability used to produce the set with size <i>a</i>.
115
+ * @return the hacky Adjuster
116
+ */
117
+ static double hacky_adjuster(double f) {
118
+ const double tmp = sqrt(1.0 - f);
119
+ return (f <= 0.5) ? tmp : tmp + (0.01 * (f - 0.5));
120
+ }
121
+
122
+ static void check_inputs(uint64_t a, uint64_t b, double f) {
123
+ if (a < b) {
124
+ throw std::invalid_argument("a must be >= b: a = " + std::to_string(a) + ", b = " + std::to_string(b));
125
+ }
126
+ if ((f > 1.0) || (f <= 0.0)) {
127
+ throw std::invalid_argument("Required: ((f <= 1.0) && (f > 0.0)): " + std::to_string(f));
128
+ }
129
+ }
130
+
131
+ };
132
+
133
+ } /* namespace datasketches */
134
+
135
+ # endif