datasketches 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,250 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch.hpp>
21
+
22
+ #include <theta_a_not_b_experimental.hpp>
23
+
24
+ namespace datasketches {
25
+
26
+ // These tests have been copied from the existing theta sketch implementation.
27
+
28
+ using update_theta_sketch = update_theta_sketch_experimental<>;
29
+ using compact_theta_sketch = compact_theta_sketch_experimental<>;
30
+ using theta_a_not_b = theta_a_not_b_experimental<>;
31
+
32
+ TEST_CASE("theta a-not-b: empty", "[theta_a_not_b]") {
33
+ theta_a_not_b a_not_b;
34
+ auto a = update_theta_sketch::builder().build();
35
+ auto b = update_theta_sketch::builder().build();
36
+ compact_theta_sketch result = a_not_b.compute(a, b);
37
+ REQUIRE(result.get_num_retained() == 0);
38
+ REQUIRE(result.is_empty());
39
+ REQUIRE_FALSE(result.is_estimation_mode());
40
+ REQUIRE(result.get_estimate() == 0.0);
41
+ }
42
+
43
+ TEST_CASE("theta a-not-b: non empty no retained keys", "[theta_a_not_b]") {
44
+ update_theta_sketch a = update_theta_sketch::builder().build();
45
+ a.update(1);
46
+ update_theta_sketch b = update_theta_sketch::builder().set_p(0.001).build();
47
+ theta_a_not_b a_not_b;
48
+
49
+ // B is still empty
50
+ compact_theta_sketch result = a_not_b.compute(a, b);
51
+ REQUIRE_FALSE(result.is_empty());
52
+ REQUIRE_FALSE(result.is_estimation_mode());
53
+ REQUIRE(result.get_num_retained() == 1);
54
+ REQUIRE(result.get_theta() == Approx(1).margin(1e-10));
55
+ REQUIRE(result.get_estimate() == 1.0);
56
+
57
+ // B is not empty in estimation mode and no entries
58
+ b.update(1);
59
+ REQUIRE(b.get_num_retained() == 0U);
60
+
61
+ result = a_not_b.compute(a, b);
62
+ REQUIRE_FALSE(result.is_empty());
63
+ REQUIRE(result.is_estimation_mode());
64
+ REQUIRE(result.get_num_retained() == 0);
65
+ REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
66
+ REQUIRE(result.get_estimate() == 0.0);
67
+ }
68
+
69
+ TEST_CASE("theta a-not-b: exact mode half overlap", "[theta_a_not_b]") {
70
+ update_theta_sketch a = update_theta_sketch::builder().build();
71
+ int value = 0;
72
+ for (int i = 0; i < 1000; i++) a.update(value++);
73
+
74
+ update_theta_sketch b = update_theta_sketch::builder().build();
75
+ value = 500;
76
+ for (int i = 0; i < 1000; i++) b.update(value++);
77
+
78
+ theta_a_not_b a_not_b;
79
+
80
+ // unordered inputs, ordered result
81
+ compact_theta_sketch result = a_not_b.compute(a, b);
82
+ REQUIRE_FALSE(result.is_empty());
83
+ REQUIRE_FALSE(result.is_estimation_mode());
84
+ REQUIRE(result.is_ordered());
85
+ REQUIRE(result.get_estimate() == 500.0);
86
+
87
+ // unordered inputs, unordered result
88
+ result = a_not_b.compute(a, b, false);
89
+ REQUIRE_FALSE(result.is_empty());
90
+ REQUIRE_FALSE(result.is_estimation_mode());
91
+ REQUIRE_FALSE(result.is_ordered());
92
+ REQUIRE(result.get_estimate() == 500.0);
93
+
94
+ // ordered inputs
95
+ result = a_not_b.compute(a.compact(), b.compact());
96
+ REQUIRE_FALSE(result.is_empty());
97
+ REQUIRE_FALSE(result.is_estimation_mode());
98
+ REQUIRE(result.is_ordered());
99
+ REQUIRE(result.get_estimate() == 500.0);
100
+
101
+ // A is ordered, so the result is ordered regardless
102
+ result = a_not_b.compute(a.compact(), b, false);
103
+ REQUIRE_FALSE(result.is_empty());
104
+ REQUIRE_FALSE(result.is_estimation_mode());
105
+ REQUIRE(result.is_ordered());
106
+ REQUIRE(result.get_estimate() == 500.0);
107
+ }
108
+
109
+ TEST_CASE("theta a-not-b: exact mode disjoint", "[theta_a_not_b]") {
110
+ update_theta_sketch a = update_theta_sketch::builder().build();
111
+ int value = 0;
112
+ for (int i = 0; i < 1000; i++) a.update(value++);
113
+
114
+ update_theta_sketch b = update_theta_sketch::builder().build();
115
+ for (int i = 0; i < 1000; i++) b.update(value++);
116
+
117
+ theta_a_not_b a_not_b;
118
+
119
+ // unordered inputs
120
+ compact_theta_sketch result = a_not_b.compute(a, b);
121
+ REQUIRE_FALSE(result.is_empty());
122
+ REQUIRE_FALSE(result.is_estimation_mode());
123
+ REQUIRE(result.get_estimate() == 1000.0);
124
+
125
+ // ordered inputs
126
+ result = a_not_b.compute(a.compact(), b.compact());
127
+ REQUIRE_FALSE(result.is_empty());
128
+ REQUIRE_FALSE(result.is_estimation_mode());
129
+ REQUIRE(result.get_estimate() == 1000.0);
130
+ }
131
+
132
+ TEST_CASE("theta a-not-b: exact mode full overlap", "[theta_a_not_b]") {
133
+ update_theta_sketch sketch = update_theta_sketch::builder().build();
134
+ int value = 0;
135
+ for (int i = 0; i < 1000; i++) sketch.update(value++);
136
+
137
+ theta_a_not_b a_not_b;
138
+
139
+ // unordered inputs
140
+ compact_theta_sketch result = a_not_b.compute(sketch, sketch);
141
+ REQUIRE(result.is_empty());
142
+ REQUIRE_FALSE(result.is_estimation_mode());
143
+ REQUIRE(result.get_estimate() == 0.0);
144
+
145
+ // ordered inputs
146
+ result = a_not_b.compute(sketch.compact(), sketch.compact());
147
+ REQUIRE(result.is_empty());
148
+ REQUIRE_FALSE(result.is_estimation_mode());
149
+ REQUIRE(result.get_estimate() == 0.0);
150
+ }
151
+
152
+ TEST_CASE("theta a-not-b: estimation mode half overlap", "[theta_a_not_b]") {
153
+ update_theta_sketch a = update_theta_sketch::builder().build();
154
+ int value = 0;
155
+ for (int i = 0; i < 10000; i++) a.update(value++);
156
+
157
+ update_theta_sketch b = update_theta_sketch::builder().build();
158
+ value = 5000;
159
+ for (int i = 0; i < 10000; i++) b.update(value++);
160
+
161
+ theta_a_not_b a_not_b;
162
+
163
+ // unordered inputs
164
+ compact_theta_sketch result = a_not_b.compute(a, b);
165
+ REQUIRE_FALSE(result.is_empty());
166
+ REQUIRE(result.is_estimation_mode());
167
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
168
+
169
+ // ordered inputs
170
+ result = a_not_b.compute(a.compact(), b.compact());
171
+ REQUIRE_FALSE(result.is_empty());
172
+ REQUIRE(result.is_estimation_mode());
173
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
174
+ }
175
+
176
+ TEST_CASE("theta a-not-b: estimation mode disjoint", "[theta_a_not_b]") {
177
+ update_theta_sketch a = update_theta_sketch::builder().build();
178
+ int value = 0;
179
+ for (int i = 0; i < 10000; i++) a.update(value++);
180
+
181
+ update_theta_sketch b = update_theta_sketch::builder().build();
182
+ for (int i = 0; i < 10000; i++) b.update(value++);
183
+
184
+ theta_a_not_b a_not_b;
185
+
186
+ // unordered inputs
187
+ compact_theta_sketch result = a_not_b.compute(a, b);
188
+ REQUIRE_FALSE(result.is_empty());
189
+ REQUIRE(result.is_estimation_mode());
190
+ REQUIRE(result.get_estimate() == Approx(10000).margin(10000 * 0.02));
191
+
192
+ // ordered inputs
193
+ result = a_not_b.compute(a.compact(), b.compact());
194
+ REQUIRE_FALSE(result.is_empty());
195
+ REQUIRE(result.is_estimation_mode());
196
+ REQUIRE(result.get_estimate() == Approx(10000).margin(10000 * 0.02));
197
+ }
198
+
199
+ TEST_CASE("theta a-not-b: estimation mode full overlap", "[theta_a_not_b]") {
200
+ update_theta_sketch sketch = update_theta_sketch::builder().build();
201
+ int value = 0;
202
+ for (int i = 0; i < 10000; i++) sketch.update(value++);
203
+
204
+ theta_a_not_b a_not_b;
205
+
206
+ // unordered inputs
207
+ compact_theta_sketch result = a_not_b.compute(sketch, sketch);
208
+ REQUIRE_FALSE(result.is_empty());
209
+ REQUIRE(result.is_estimation_mode());
210
+ REQUIRE(result.get_estimate() == 0.0);
211
+
212
+ // ordered inputs
213
+ result = a_not_b.compute(sketch.compact(), sketch.compact());
214
+ REQUIRE_FALSE(result.is_empty());
215
+ REQUIRE(result.is_estimation_mode());
216
+ REQUIRE(result.get_estimate() == 0.0);
217
+ }
218
+
219
+ TEST_CASE("theta a-not-b: seed mismatch", "[theta_a_not_b]") {
220
+ update_theta_sketch sketch = update_theta_sketch::builder().build();
221
+ sketch.update(1); // non-empty should not be ignored
222
+ theta_a_not_b a_not_b(123);
223
+ REQUIRE_THROWS_AS(a_not_b.compute(sketch, sketch), std::invalid_argument);
224
+ }
225
+
226
+ TEST_CASE("theta a-not-b: issue #152", "[theta_a_not_b]") {
227
+ update_theta_sketch a = update_theta_sketch::builder().build();
228
+ int value = 0;
229
+ for (int i = 0; i < 10000; i++) a.update(value++);
230
+
231
+ update_theta_sketch b = update_theta_sketch::builder().build();
232
+ value = 5000;
233
+ for (int i = 0; i < 25000; i++) b.update(value++);
234
+
235
+ theta_a_not_b a_not_b;
236
+
237
+ // unordered inputs
238
+ compact_theta_sketch result = a_not_b.compute(a, b);
239
+ REQUIRE_FALSE(result.is_empty());
240
+ REQUIRE(result.is_estimation_mode());
241
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.03));
242
+
243
+ // ordered inputs
244
+ result = a_not_b.compute(a.compact(), b.compact());
245
+ REQUIRE_FALSE(result.is_empty());
246
+ REQUIRE(result.is_estimation_mode());
247
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.03));
248
+ }
249
+
250
+ } /* namespace datasketches */
@@ -0,0 +1,224 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch.hpp>
21
+
22
+ #include <theta_intersection_experimental.hpp>
23
+
24
+ namespace datasketches {
25
+
26
+ // These tests have been copied from the existing theta sketch implementation.
27
+
28
+ using update_theta_sketch = update_theta_sketch_experimental<>;
29
+ using compact_theta_sketch = compact_theta_sketch_experimental<>;
30
+ using theta_intersection = theta_intersection_experimental<>;
31
+
32
+ TEST_CASE("theta intersection: invalid", "[theta_intersection]") {
33
+ theta_intersection intersection;
34
+ REQUIRE_FALSE(intersection.has_result());
35
+ REQUIRE_THROWS_AS(intersection.get_result(), std::invalid_argument);
36
+ }
37
+
38
+ TEST_CASE("theta intersection: empty", "[theta_intersection]") {
39
+ theta_intersection intersection;
40
+ update_theta_sketch sketch = update_theta_sketch::builder().build();
41
+ intersection.update(sketch);
42
+ compact_theta_sketch result = intersection.get_result();
43
+ REQUIRE(result.get_num_retained() == 0);
44
+ REQUIRE(result.is_empty());
45
+ REQUIRE_FALSE(result.is_estimation_mode());
46
+ REQUIRE(result.get_estimate() == 0.0);
47
+
48
+ intersection.update(sketch);
49
+ result = intersection.get_result();
50
+ REQUIRE(result.get_num_retained() == 0);
51
+ REQUIRE(result.is_empty());
52
+ REQUIRE_FALSE(result.is_estimation_mode());
53
+ REQUIRE(result.get_estimate() == 0.0);
54
+ }
55
+
56
+ TEST_CASE("theta intersection: non empty no retained keys", "[theta_intersection]") {
57
+ update_theta_sketch sketch = update_theta_sketch::builder().set_p(0.001).build();
58
+ sketch.update(1);
59
+ theta_intersection intersection;
60
+ intersection.update(sketch);
61
+ compact_theta_sketch result = intersection.get_result();
62
+ REQUIRE(result.get_num_retained() == 0);
63
+ REQUIRE_FALSE(result.is_empty());
64
+ REQUIRE(result.is_estimation_mode());
65
+ REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
66
+ REQUIRE(result.get_estimate() == 0.0);
67
+
68
+ intersection.update(sketch);
69
+ result = intersection.get_result();
70
+ REQUIRE(result.get_num_retained() == 0);
71
+ REQUIRE_FALSE(result.is_empty());
72
+ REQUIRE(result.is_estimation_mode());
73
+ REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
74
+ REQUIRE(result.get_estimate() == 0.0);
75
+ }
76
+
77
+ TEST_CASE("theta intersection: exact mode half overlap unordered", "[theta_intersection]") {
78
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
79
+ int value = 0;
80
+ for (int i = 0; i < 1000; i++) sketch1.update(value++);
81
+
82
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
83
+ value = 500;
84
+ for (int i = 0; i < 1000; i++) sketch2.update(value++);
85
+
86
+ theta_intersection intersection;
87
+ intersection.update(sketch1);
88
+ intersection.update(sketch2);
89
+ compact_theta_sketch result = intersection.get_result();
90
+ REQUIRE_FALSE(result.is_empty());
91
+ REQUIRE_FALSE(result.is_estimation_mode());
92
+ REQUIRE(result.get_estimate() == 500.0);
93
+ }
94
+
95
+ TEST_CASE("theta intersection: exact mode half overlap ordered", "[theta_intersection]") {
96
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
97
+ int value = 0;
98
+ for (int i = 0; i < 1000; i++) sketch1.update(value++);
99
+
100
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
101
+ value = 500;
102
+ for (int i = 0; i < 1000; i++) sketch2.update(value++);
103
+
104
+ theta_intersection intersection;
105
+ intersection.update(sketch1.compact());
106
+ intersection.update(sketch2.compact());
107
+ compact_theta_sketch result = intersection.get_result();
108
+ REQUIRE_FALSE(result.is_empty());
109
+ REQUIRE_FALSE(result.is_estimation_mode());
110
+ REQUIRE(result.get_estimate() == 500.0);
111
+ }
112
+
113
+ TEST_CASE("theta intersection: exact mode disjoint unordered", "[theta_intersection]") {
114
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
115
+ int value = 0;
116
+ for (int i = 0; i < 1000; i++) sketch1.update(value++);
117
+
118
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
119
+ for (int i = 0; i < 1000; i++) sketch2.update(value++);
120
+
121
+ theta_intersection intersection;
122
+ intersection.update(sketch1);
123
+ intersection.update(sketch2);
124
+ compact_theta_sketch result = intersection.get_result();
125
+ REQUIRE(result.is_empty());
126
+ REQUIRE_FALSE(result.is_estimation_mode());
127
+ REQUIRE(result.get_estimate() == 0.0);
128
+ }
129
+
130
+ TEST_CASE("theta intersection: exact mode disjoint ordered", "[theta_intersection]") {
131
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
132
+ int value = 0;
133
+ for (int i = 0; i < 1000; i++) sketch1.update(value++);
134
+
135
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
136
+ for (int i = 0; i < 1000; i++) sketch2.update(value++);
137
+
138
+ theta_intersection intersection;
139
+ intersection.update(sketch1.compact());
140
+ intersection.update(sketch2.compact());
141
+ compact_theta_sketch result = intersection.get_result();
142
+ REQUIRE(result.is_empty());
143
+ REQUIRE_FALSE(result.is_estimation_mode());
144
+ REQUIRE(result.get_estimate() == 0.0);
145
+ }
146
+
147
+ TEST_CASE("theta intersection: estimation mode half overlap unordered", "[theta_intersection]") {
148
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
149
+ int value = 0;
150
+ for (int i = 0; i < 10000; i++) sketch1.update(value++);
151
+
152
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
153
+ value = 5000;
154
+ for (int i = 0; i < 10000; i++) sketch2.update(value++);
155
+
156
+ theta_intersection intersection;
157
+ intersection.update(sketch1);
158
+ intersection.update(sketch2);
159
+ compact_theta_sketch result = intersection.get_result();
160
+ REQUIRE_FALSE(result.is_empty());
161
+ REQUIRE(result.is_estimation_mode());
162
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
163
+ }
164
+
165
+ TEST_CASE("theta intersection: estimation mode half overlap ordered", "[theta_intersection]") {
166
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
167
+ int value = 0;
168
+ for (int i = 0; i < 10000; i++) sketch1.update(value++);
169
+
170
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
171
+ value = 5000;
172
+ for (int i = 0; i < 10000; i++) sketch2.update(value++);
173
+
174
+ theta_intersection intersection;
175
+ intersection.update(sketch1.compact());
176
+ intersection.update(sketch2.compact());
177
+ compact_theta_sketch result = intersection.get_result();
178
+ REQUIRE_FALSE(result.is_empty());
179
+ REQUIRE(result.is_estimation_mode());
180
+ REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
181
+ }
182
+
183
+ TEST_CASE("theta intersection: estimation mode disjoint unordered", "[theta_intersection]") {
184
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
185
+ int value = 0;
186
+ for (int i = 0; i < 10000; i++) sketch1.update(value++);
187
+
188
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
189
+ for (int i = 0; i < 10000; i++) sketch2.update(value++);
190
+
191
+ theta_intersection intersection;
192
+ intersection.update(sketch1);
193
+ intersection.update(sketch2);
194
+ compact_theta_sketch result = intersection.get_result();
195
+ REQUIRE_FALSE(result.is_empty());
196
+ REQUIRE(result.is_estimation_mode());
197
+ REQUIRE(result.get_estimate() == 0.0);
198
+ }
199
+
200
+ TEST_CASE("theta intersection: estimation mode disjoint ordered", "[theta_intersection]") {
201
+ update_theta_sketch sketch1 = update_theta_sketch::builder().build();
202
+ int value = 0;
203
+ for (int i = 0; i < 10000; i++) sketch1.update(value++);
204
+
205
+ update_theta_sketch sketch2 = update_theta_sketch::builder().build();
206
+ for (int i = 0; i < 10000; i++) sketch2.update(value++);
207
+
208
+ theta_intersection intersection;
209
+ intersection.update(sketch1.compact());
210
+ intersection.update(sketch2.compact());
211
+ compact_theta_sketch result = intersection.get_result();
212
+ REQUIRE_FALSE(result.is_empty());
213
+ REQUIRE(result.is_estimation_mode());
214
+ REQUIRE(result.get_estimate() == 0.0);
215
+ }
216
+
217
+ TEST_CASE("theta intersection: seed mismatch", "[theta_intersection]") {
218
+ update_theta_sketch sketch = update_theta_sketch::builder().build();
219
+ sketch.update(1); // non-empty should not be ignored
220
+ theta_intersection intersection(123);
221
+ REQUIRE_THROWS_AS(intersection.update(sketch), std::invalid_argument);
222
+ }
223
+
224
+ } /* namespace datasketches */