datasketches 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (247) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE +310 -0
  4. data/NOTICE +11 -0
  5. data/README.md +126 -0
  6. data/ext/datasketches/cpc_wrapper.cpp +50 -0
  7. data/ext/datasketches/ext.cpp +12 -0
  8. data/ext/datasketches/extconf.rb +11 -0
  9. data/ext/datasketches/hll_wrapper.cpp +69 -0
  10. data/lib/datasketches.rb +9 -0
  11. data/lib/datasketches/version.rb +3 -0
  12. data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
  13. data/vendor/datasketches-cpp/LICENSE +311 -0
  14. data/vendor/datasketches-cpp/MANIFEST.in +19 -0
  15. data/vendor/datasketches-cpp/NOTICE +11 -0
  16. data/vendor/datasketches-cpp/README.md +42 -0
  17. data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
  18. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
  19. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
  20. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
  21. data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
  24. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
  25. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
  26. data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
  27. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
  28. data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
  29. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
  30. data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
  31. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
  32. data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
  33. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
  34. data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
  35. data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
  36. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
  37. data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
  38. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
  39. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
  40. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
  41. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
  42. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
  43. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
  44. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
  45. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
  46. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
  47. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
  48. data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
  49. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
  50. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
  51. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
  52. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
  53. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
  54. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
  55. data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
  59. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
  60. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
  63. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  64. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  65. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  66. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
  67. data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
  68. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
  69. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
  70. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
  71. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
  72. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
  73. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
  74. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
  75. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
  76. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
  77. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
  78. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
  79. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
  80. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
  81. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
  82. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
  83. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
  84. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
  85. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
  86. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
  87. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
  88. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
  89. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
  90. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
  91. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
  92. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
  93. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
  94. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
  95. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
  96. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
  97. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
  98. data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
  99. data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
  100. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
  101. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
  102. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
  103. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
  104. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
  105. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
  106. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
  107. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
  108. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
  109. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
  110. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
  111. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  112. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  113. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  114. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  115. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  116. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  117. data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
  118. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
  119. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
  120. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
  121. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
  122. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
  123. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
  124. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
  125. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
  126. data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
  127. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  128. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
  129. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
  130. data/vendor/datasketches-cpp/pyproject.toml +17 -0
  131. data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
  132. data/vendor/datasketches-cpp/python/README.md +78 -0
  133. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
  134. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
  135. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
  136. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
  137. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
  138. data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
  139. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
  140. data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
  141. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
  142. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
  143. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
  144. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
  145. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
  146. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
  147. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
  148. data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
  149. data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
  150. data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
  151. data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
  152. data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
  153. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
  154. data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
  155. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
  156. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
  157. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
  158. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
  159. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
  160. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
  161. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
  162. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
  163. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
  164. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  165. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  166. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  167. data/vendor/datasketches-cpp/setup.py +94 -0
  168. data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
  169. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
  170. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
  171. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
  172. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
  173. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
  174. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
  175. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
  176. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
  177. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
  178. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
  179. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  180. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  181. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  182. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
  183. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
  184. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
  185. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
  188. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
  189. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
  190. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
  191. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
  192. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
  193. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
  194. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
  195. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
  196. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
  197. data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
  198. data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
  199. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
  200. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
  201. data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
  202. data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
  203. data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
  204. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
  205. data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
  206. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
  207. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
  208. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
  209. data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
  210. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
  211. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
  212. data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
  213. data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
  214. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
  215. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
  216. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
  217. data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
  218. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
  219. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
  220. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
  221. data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
  222. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
  223. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
  224. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
  225. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
  226. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
  227. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
  228. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  230. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  231. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
  232. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
  233. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
  234. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
  238. data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
  239. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
  240. data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
  241. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
  242. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
  243. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
  244. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
  245. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
  246. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
  247. metadata +302 -0
@@ -0,0 +1,109 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef TUPLE_UNION_HPP_
21
+ #define TUPLE_UNION_HPP_
22
+
23
+ #include "tuple_sketch.hpp"
24
+ #include "theta_union_base.hpp"
25
+
26
+ namespace datasketches {
27
+
28
+ // for types with defined + operation
29
+ template<typename Summary>
30
+ struct default_union_policy {
31
+ void operator()(Summary& summary, const Summary& other) const {
32
+ summary += other;
33
+ }
34
+ };
35
+
36
+ template<
37
+ typename Summary,
38
+ typename Policy = default_union_policy<Summary>,
39
+ typename Allocator = std::allocator<Summary>
40
+ >
41
+ class tuple_union {
42
+ public:
43
+ using Entry = std::pair<uint64_t, Summary>;
44
+ using ExtractKey = pair_extract_key<uint64_t, Summary>;
45
+ using Sketch = tuple_sketch<Summary, Allocator>;
46
+ using CompactSketch = compact_tuple_sketch<Summary, Allocator>;
47
+ using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
48
+ using resize_factor = theta_constants::resize_factor;
49
+
50
+ // reformulate the external policy that operates on Summary
51
+ // in terms of operations on Entry
52
+ struct internal_policy {
53
+ internal_policy(const Policy& policy): policy_(policy) {}
54
+ void operator()(Entry& internal_entry, const Entry& incoming_entry) const {
55
+ policy_(internal_entry.second, incoming_entry.second);
56
+ }
57
+ void operator()(Entry& internal_entry, Entry&& incoming_entry) const {
58
+ policy_(internal_entry.second, std::move(incoming_entry.second));
59
+ }
60
+ const Policy& get_policy() const { return policy_; }
61
+ Policy policy_;
62
+ };
63
+
64
+ using State = theta_union_base<Entry, ExtractKey, internal_policy, Sketch, CompactSketch, AllocEntry>;
65
+
66
+ // No constructor here. Use builder instead.
67
+ class builder;
68
+
69
+ /**
70
+ * This method is to update the union with a given sketch
71
+ * @param sketch to update the union with
72
+ */
73
+ template<typename FwdSketch>
74
+ void update(FwdSketch&& sketch);
75
+
76
+ /**
77
+ * This method produces a copy of the current state of the union as a compact sketch.
78
+ * @param ordered optional flag to specify if ordered sketch should be produced
79
+ * @return the result of the union
80
+ */
81
+ CompactSketch get_result(bool ordered = true) const;
82
+
83
+ protected:
84
+ State state_;
85
+
86
+ // for builder
87
+ tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
88
+ };
89
+
90
+ template<typename S, typename P, typename A>
91
+ class tuple_union<S, P, A>::builder: public tuple_base_builder<builder, P, A> {
92
+ public:
93
+ /**
94
+ * Creates and instance of the builder with default parameters.
95
+ */
96
+ builder(const P& policy = P(), const A& allocator = A());
97
+
98
+ /**
99
+ * This is to create an instance of the union with predefined parameters.
100
+ * @return an instance of the union
101
+ */
102
+ tuple_union build() const;
103
+ };
104
+
105
+ } /* namespace datasketches */
106
+
107
+ #include "tuple_union_impl.hpp"
108
+
109
+ #endif
@@ -0,0 +1,47 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ namespace datasketches {
21
+
22
+ template<typename S, typename P, typename A>
23
+ tuple_union<S, P, A>::tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
24
+ state_(lg_cur_size, lg_nom_size, rf, theta, seed, internal_policy(policy), allocator)
25
+ {}
26
+
27
+ template<typename S, typename P, typename A>
28
+ template<typename SS>
29
+ void tuple_union<S, P, A>::update(SS&& sketch) {
30
+ state_.update(std::forward<SS>(sketch));
31
+ }
32
+
33
+ template<typename S, typename P, typename A>
34
+ auto tuple_union<S, P, A>::get_result(bool ordered) const -> CompactSketch {
35
+ return state_.get_result(ordered);
36
+ }
37
+
38
+ template<typename S, typename P, typename A>
39
+ tuple_union<S, P, A>::builder::builder(const P& policy, const A& allocator):
40
+ tuple_base_builder<builder, P, A>(policy, allocator) {}
41
+
42
+ template<typename S, typename P, typename A>
43
+ auto tuple_union<S, P, A>::builder::build() const -> tuple_union {
44
+ return tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
45
+ }
46
+
47
+ } /* namespace datasketches */
@@ -0,0 +1,53 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ add_executable(tuple_test)
19
+
20
+ target_link_libraries(tuple_test tuple common_test)
21
+
22
+ set_target_properties(tuple_test PROPERTIES
23
+ CXX_STANDARD 11
24
+ CXX_STANDARD_REQUIRED YES
25
+ )
26
+
27
+ file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" THETA_TEST_BINARY_PATH)
28
+ string(APPEND THETA_TEST_BINARY_PATH "/")
29
+ target_compile_definitions(tuple_test
30
+ PRIVATE
31
+ TEST_BINARY_INPUT_PATH="${THETA_TEST_BINARY_PATH}"
32
+ )
33
+
34
+ add_test(
35
+ NAME tuple_test
36
+ COMMAND tuple_test
37
+ )
38
+
39
+ target_sources(tuple_test
40
+ PRIVATE
41
+ tuple_sketch_test.cpp
42
+ tuple_sketch_allocation_test.cpp
43
+ tuple_union_test.cpp
44
+ tuple_intersection_test.cpp
45
+ tuple_a_not_b_test.cpp
46
+ array_of_doubles_sketch_test.cpp
47
+ theta_sketch_experimental_test.cpp
48
+ theta_union_experimental_test.cpp
49
+ theta_intersection_experimental_test.cpp
50
+ theta_a_not_b_experimental_test.cpp
51
+ theta_jaccard_similarity_test.cpp
52
+ tuple_jaccard_similarity_test.cpp
53
+ )
@@ -0,0 +1 @@
1
+  ̓�������
@@ -0,0 +1 @@
1
+  ̓�������
@@ -0,0 +1,298 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <iostream>
21
+ #include <fstream>
22
+ #include <sstream>
23
+ #include <array>
24
+
25
+ #include <catch.hpp>
26
+ #include <array_of_doubles_sketch.hpp>
27
+ #include <array_of_doubles_union.hpp>
28
+ #include <array_of_doubles_intersection.hpp>
29
+ #include <array_of_doubles_a_not_b.hpp>
30
+
31
+ namespace datasketches {
32
+
33
+ #ifdef TEST_BINARY_INPUT_PATH
34
+ const std::string inputPath = TEST_BINARY_INPUT_PATH;
35
+ #else
36
+ const std::string inputPath = "test/";
37
+ #endif
38
+
39
+ TEST_CASE("aod sketch: serialization compatibility with java - empty", "[tuple_sketch]") {
40
+ auto update_sketch = update_array_of_doubles_sketch::builder().build();
41
+ REQUIRE(update_sketch.is_empty());
42
+ REQUIRE(update_sketch.get_num_retained() == 0);
43
+ auto compact_sketch = update_sketch.compact();
44
+
45
+ // read binary sketch from Java
46
+ std::ifstream is;
47
+ is.exceptions(std::ios::failbit | std::ios::badbit);
48
+ is.open(inputPath + "aod_1_compact_empty_from_java.sk", std::ios::binary);
49
+ auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is);
50
+ REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained());
51
+ REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10));
52
+ REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10));
53
+ REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10));
54
+ REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10));
55
+ }
56
+
57
+ TEST_CASE("aod sketch: serialization compatibility with java - empty configured for three values", "[tuple_sketch]") {
58
+ auto update_sketch = update_array_of_doubles_sketch::builder(3).build();
59
+ REQUIRE(update_sketch.is_empty());
60
+ REQUIRE(update_sketch.get_num_retained() == 0);
61
+ REQUIRE(update_sketch.get_num_values() == 3);
62
+ auto compact_sketch = update_sketch.compact();
63
+
64
+ // read binary sketch from Java
65
+ std::ifstream is;
66
+ is.exceptions(std::ios::failbit | std::ios::badbit);
67
+ is.open(inputPath + "aod_3_compact_empty_from_java.sk", std::ios::binary);
68
+ auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is);
69
+ REQUIRE(compact_sketch.get_num_values() == compact_sketch_from_java.get_num_values());
70
+ REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained());
71
+ REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10));
72
+ REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10));
73
+ REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10));
74
+ REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10));
75
+ }
76
+
77
+ TEST_CASE("aod sketch: serialization compatibility with java - non-empty no entries", "[tuple_sketch]") {
78
+ auto update_sketch = update_array_of_doubles_sketch::builder().set_p(0.01).build();
79
+ std::vector<double> a = {1};
80
+ update_sketch.update(1, a);
81
+ REQUIRE_FALSE(update_sketch.is_empty());
82
+ REQUIRE(update_sketch.get_num_retained() == 0);
83
+ auto compact_sketch = update_sketch.compact();
84
+
85
+ // read binary sketch from Java
86
+ std::ifstream is;
87
+ is.exceptions(std::ios::failbit | std::ios::badbit);
88
+ is.open(inputPath + "aod_1_compact_non_empty_no_entries_from_java.sk", std::ios::binary);
89
+ auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is);
90
+ REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained());
91
+ REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10));
92
+ REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10));
93
+ REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10));
94
+ REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10));
95
+ }
96
+
97
+ TEST_CASE("aod sketch: serialization compatibility with java - estimation mode", "[tuple_sketch]") {
98
+ auto update_sketch = update_array_of_doubles_sketch::builder().build();
99
+ std::vector<double> a = {1};
100
+ for (int i = 0; i < 8192; ++i) update_sketch.update(i, a);
101
+ auto compact_sketch = update_sketch.compact();
102
+
103
+ // read binary sketch from Java
104
+ std::ifstream is;
105
+ is.exceptions(std::ios::failbit | std::ios::badbit);
106
+ is.open(inputPath + "aod_1_compact_estimation_from_java.sk", std::ios::binary);
107
+ auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is);
108
+ REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained());
109
+ REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10));
110
+ REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10));
111
+ REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10));
112
+ REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10));
113
+ REQUIRE(compact_sketch.get_lower_bound(2) == Approx(compact_sketch_from_java.get_lower_bound(2)).margin(1e-10));
114
+ REQUIRE(compact_sketch.get_upper_bound(2) == Approx(compact_sketch_from_java.get_upper_bound(2)).margin(1e-10));
115
+ REQUIRE(compact_sketch.get_lower_bound(3) == Approx(compact_sketch_from_java.get_lower_bound(3)).margin(1e-10));
116
+ REQUIRE(compact_sketch.get_upper_bound(3) == Approx(compact_sketch_from_java.get_upper_bound(3)).margin(1e-10));
117
+
118
+ // sketch from Java is not ordered
119
+ // transform it to ordered so that iteration sequence would match exactly
120
+ compact_array_of_doubles_sketch ordered_sketch_from_java(compact_sketch_from_java, true);
121
+ auto it = ordered_sketch_from_java.begin();
122
+ for (const auto& entry: compact_sketch) {
123
+ REQUIRE(entry == *it);
124
+ ++it;
125
+ }
126
+ }
127
+
128
+ TEST_CASE("aod sketch: serialization compatibility with java - exact mode with two values", "[tuple_sketch]") {
129
+ auto update_sketch = update_array_of_doubles_sketch::builder(2).build();
130
+ std::vector<double> a = {1, 2};
131
+ for (int i = 0; i < 1000; ++i) update_sketch.update(i, a.data()); // pass vector as pointer
132
+ auto compact_sketch = update_sketch.compact();
133
+ REQUIRE_FALSE(compact_sketch.is_estimation_mode());
134
+
135
+ // read binary sketch from Java
136
+ std::ifstream is;
137
+ is.exceptions(std::ios::failbit | std::ios::badbit);
138
+ is.open(inputPath + "aod_2_compact_exact_from_java.sk", std::ios::binary);
139
+ auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is);
140
+ REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained());
141
+ REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10));
142
+ REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10));
143
+ REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10));
144
+ REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10));
145
+ REQUIRE(compact_sketch.get_lower_bound(2) == Approx(compact_sketch_from_java.get_lower_bound(2)).margin(1e-10));
146
+ REQUIRE(compact_sketch.get_upper_bound(2) == Approx(compact_sketch_from_java.get_upper_bound(2)).margin(1e-10));
147
+ REQUIRE(compact_sketch.get_lower_bound(3) == Approx(compact_sketch_from_java.get_lower_bound(3)).margin(1e-10));
148
+ REQUIRE(compact_sketch.get_upper_bound(3) == Approx(compact_sketch_from_java.get_upper_bound(3)).margin(1e-10));
149
+
150
+ // sketch from Java is not ordered
151
+ // transform it to ordered so that iteration sequence would match exactly
152
+ compact_array_of_doubles_sketch ordered_sketch_from_java(compact_sketch_from_java, true);
153
+ auto it = ordered_sketch_from_java.begin();
154
+ for (const auto& entry: compact_sketch) {
155
+ REQUIRE(entry.first == (*it).first);
156
+ REQUIRE(entry.second.size() == 2);
157
+ REQUIRE(entry.second[0] == (*it).second[0]);
158
+ REQUIRE(entry.second[1] == (*it).second[1]);
159
+ ++it;
160
+ }
161
+ }
162
+
163
+ TEST_CASE("aod sketch: stream serialize deserialize - estimation mode", "[tuple_sketch]") {
164
+ auto update_sketch = update_array_of_doubles_sketch::builder(2).build();
165
+ std::vector<double> a = {1, 2};
166
+ for (int i = 0; i < 8192; ++i) update_sketch.update(i, a);
167
+ auto compact_sketch = update_sketch.compact();
168
+
169
+ std::stringstream ss;
170
+ ss.exceptions(std::ios::failbit | std::ios::badbit);
171
+ compact_sketch.serialize(ss);
172
+ auto deserialized_sketch = compact_array_of_doubles_sketch::deserialize(ss);
173
+ REQUIRE(compact_sketch.get_num_retained() == deserialized_sketch.get_num_retained());
174
+ REQUIRE(compact_sketch.get_theta() == Approx(deserialized_sketch.get_theta()).margin(1e-10));
175
+ REQUIRE(compact_sketch.get_estimate() == Approx(deserialized_sketch.get_estimate()).margin(1e-10));
176
+ REQUIRE(compact_sketch.get_lower_bound(1) == Approx(deserialized_sketch.get_lower_bound(1)).margin(1e-10));
177
+ REQUIRE(compact_sketch.get_upper_bound(1) == Approx(deserialized_sketch.get_upper_bound(1)).margin(1e-10));
178
+ REQUIRE(compact_sketch.get_lower_bound(2) == Approx(deserialized_sketch.get_lower_bound(2)).margin(1e-10));
179
+ REQUIRE(compact_sketch.get_upper_bound(2) == Approx(deserialized_sketch.get_upper_bound(2)).margin(1e-10));
180
+ REQUIRE(compact_sketch.get_lower_bound(3) == Approx(deserialized_sketch.get_lower_bound(3)).margin(1e-10));
181
+ REQUIRE(compact_sketch.get_upper_bound(3) == Approx(deserialized_sketch.get_upper_bound(3)).margin(1e-10));
182
+ // sketches must be ordered and the iteration sequence must match exactly
183
+ auto it = deserialized_sketch.begin();
184
+ for (const auto& entry: compact_sketch) {
185
+ REQUIRE(entry.first == (*it).first);
186
+ REQUIRE(entry.second.size() == 2);
187
+ REQUIRE(entry.second[0] == (*it).second[0]);
188
+ REQUIRE(entry.second[1] == (*it).second[1]);
189
+ ++it;
190
+ }
191
+ }
192
+
193
+ TEST_CASE("aod sketch: bytes to stream serialize deserialize - estimation mode", "[tuple_sketch]") {
194
+ auto update_sketch = update_array_of_doubles_sketch::builder(2).build();
195
+ std::vector<double> a = {1, 2};
196
+ for (int i = 0; i < 8192; ++i) update_sketch.update(i, a);
197
+ auto compact_sketch = update_sketch.compact();
198
+
199
+ auto bytes = compact_sketch.serialize();
200
+ std::stringstream ss;
201
+ ss.exceptions(std::ios::failbit | std::ios::badbit);
202
+ ss.write(reinterpret_cast<const char*>(bytes.data()), bytes.size());
203
+ auto deserialized_sketch = compact_array_of_doubles_sketch::deserialize(ss);
204
+ REQUIRE(compact_sketch.get_num_retained() == deserialized_sketch.get_num_retained());
205
+ REQUIRE(compact_sketch.get_theta() == Approx(deserialized_sketch.get_theta()).margin(1e-10));
206
+ REQUIRE(compact_sketch.get_estimate() == Approx(deserialized_sketch.get_estimate()).margin(1e-10));
207
+ REQUIRE(compact_sketch.get_lower_bound(1) == Approx(deserialized_sketch.get_lower_bound(1)).margin(1e-10));
208
+ REQUIRE(compact_sketch.get_upper_bound(1) == Approx(deserialized_sketch.get_upper_bound(1)).margin(1e-10));
209
+ REQUIRE(compact_sketch.get_lower_bound(2) == Approx(deserialized_sketch.get_lower_bound(2)).margin(1e-10));
210
+ REQUIRE(compact_sketch.get_upper_bound(2) == Approx(deserialized_sketch.get_upper_bound(2)).margin(1e-10));
211
+ REQUIRE(compact_sketch.get_lower_bound(3) == Approx(deserialized_sketch.get_lower_bound(3)).margin(1e-10));
212
+ REQUIRE(compact_sketch.get_upper_bound(3) == Approx(deserialized_sketch.get_upper_bound(3)).margin(1e-10));
213
+ // sketches must be ordered and the iteration sequence must match exactly
214
+ auto it = deserialized_sketch.begin();
215
+ for (const auto& entry: compact_sketch) {
216
+ REQUIRE(entry.first == (*it).first);
217
+ REQUIRE(entry.second.size() == 2);
218
+ REQUIRE(entry.second[0] == (*it).second[0]);
219
+ REQUIRE(entry.second[1] == (*it).second[1]);
220
+ ++it;
221
+ }
222
+ }
223
+
224
+ TEST_CASE("aod sketch: bytes serialize deserialize - estimation mode", "[tuple_sketch]") {
225
+ auto update_sketch = update_array_of_doubles_sketch::builder(2).build();
226
+ std::vector<double> a = {1, 2};
227
+ for (int i = 0; i < 8192; ++i) update_sketch.update(i, a);
228
+ auto compact_sketch = update_sketch.compact();
229
+
230
+ auto bytes = compact_sketch.serialize();
231
+ auto deserialized_sketch = compact_array_of_doubles_sketch::deserialize(bytes.data(), bytes.size());
232
+ REQUIRE(compact_sketch.get_num_retained() == deserialized_sketch.get_num_retained());
233
+ REQUIRE(compact_sketch.get_theta() == Approx(deserialized_sketch.get_theta()).margin(1e-10));
234
+ REQUIRE(compact_sketch.get_estimate() == Approx(deserialized_sketch.get_estimate()).margin(1e-10));
235
+ REQUIRE(compact_sketch.get_lower_bound(1) == Approx(deserialized_sketch.get_lower_bound(1)).margin(1e-10));
236
+ REQUIRE(compact_sketch.get_upper_bound(1) == Approx(deserialized_sketch.get_upper_bound(1)).margin(1e-10));
237
+ REQUIRE(compact_sketch.get_lower_bound(2) == Approx(deserialized_sketch.get_lower_bound(2)).margin(1e-10));
238
+ REQUIRE(compact_sketch.get_upper_bound(2) == Approx(deserialized_sketch.get_upper_bound(2)).margin(1e-10));
239
+ REQUIRE(compact_sketch.get_lower_bound(3) == Approx(deserialized_sketch.get_lower_bound(3)).margin(1e-10));
240
+ REQUIRE(compact_sketch.get_upper_bound(3) == Approx(deserialized_sketch.get_upper_bound(3)).margin(1e-10));
241
+ // sketches must be ordered and the iteration sequence must match exactly
242
+ auto it = deserialized_sketch.begin();
243
+ for (const auto& entry: compact_sketch) {
244
+ REQUIRE(entry.first == (*it).first);
245
+ REQUIRE(entry.second.size() == 2);
246
+ REQUIRE(entry.second[0] == (*it).second[0]);
247
+ REQUIRE(entry.second[1] == (*it).second[1]);
248
+ ++it;
249
+ }
250
+ }
251
+
252
+ TEST_CASE("aod union: half overlap", "[tuple_sketch]") {
253
+ std::vector<double> a = {1};
254
+
255
+ auto update_sketch1 = update_array_of_doubles_sketch::builder().build();
256
+ for (int i = 0; i < 1000; ++i) update_sketch1.update(i, a);
257
+
258
+ auto update_sketch2 = update_array_of_doubles_sketch::builder().build();
259
+ for (int i = 500; i < 1500; ++i) update_sketch2.update(i, a);
260
+
261
+ auto u = array_of_doubles_union::builder().build();
262
+ u.update(update_sketch1);
263
+ u.update(update_sketch2);
264
+ auto result = u.get_result();
265
+ REQUIRE(result.get_estimate() == Approx(1500).margin(0.01));
266
+ }
267
+
268
+ TEST_CASE("aod intersection: half overlap", "[tuple_sketch]") {
269
+ std::vector<double> a = {1};
270
+
271
+ auto update_sketch1 = update_array_of_doubles_sketch::builder().build();
272
+ for (int i = 0; i < 1000; ++i) update_sketch1.update(i, a);
273
+
274
+ auto update_sketch2 = update_array_of_doubles_sketch::builder().build();
275
+ for (int i = 500; i < 1500; ++i) update_sketch2.update(i, a);
276
+
277
+ array_of_doubles_intersection<array_of_doubles_union_policy> intersection;
278
+ intersection.update(update_sketch1);
279
+ intersection.update(update_sketch2);
280
+ auto result = intersection.get_result();
281
+ REQUIRE(result.get_estimate() == Approx(500).margin(0.01));
282
+ }
283
+
284
+ TEST_CASE("aod a-not-b: half overlap", "[tuple_sketch]") {
285
+ double a[1] = {1};
286
+
287
+ auto update_sketch1 = update_array_of_doubles_sketch::builder().build();
288
+ for (int i = 0; i < 1000; ++i) update_sketch1.update(i, a);
289
+
290
+ auto update_sketch2 = update_array_of_doubles_sketch::builder().build();
291
+ for (int i = 500; i < 1500; ++i) update_sketch2.update(i, a);
292
+
293
+ array_of_doubles_a_not_b a_not_b;
294
+ auto result = a_not_b.compute(update_sketch1, update_sketch2);
295
+ REQUIRE(result.get_estimate() == Approx(500).margin(0.01));
296
+ }
297
+
298
+ } /* namespace datasketches */