datasketches 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (245) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +3 -3
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +23 -20
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/README.md +1 -3
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  16. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +25 -27
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +15 -10
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +11 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +5 -4
  21. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  23. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  24. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  25. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +2 -2
  26. data/vendor/datasketches-cpp/common/test/integration_test.cpp +6 -0
  27. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  28. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  29. data/vendor/datasketches-cpp/{python/src/__init__.py → count/CMakeLists.txt} +25 -1
  30. data/vendor/datasketches-cpp/count/include/count_min.hpp +405 -0
  31. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +497 -0
  32. data/vendor/datasketches-cpp/{MANIFEST.in → count/test/CMakeLists.txt} +23 -20
  33. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +155 -0
  34. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +303 -0
  35. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  36. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  37. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  38. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +3 -3
  39. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  40. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +14 -11
  41. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  42. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  43. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +16 -8
  44. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  45. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  46. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  47. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  48. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_serialize_for_java.cpp +38 -0
  49. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  50. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  51. data/vendor/datasketches-cpp/{tox.ini → density/CMakeLists.txt} +24 -8
  52. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +256 -0
  53. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +543 -0
  54. data/vendor/datasketches-cpp/{python/datasketches/__init__.py → density/test/CMakeLists.txt} +15 -3
  55. data/vendor/datasketches-cpp/density/test/density_sketch_test.cpp +244 -0
  56. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  57. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  58. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +9 -3
  59. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  60. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  61. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  62. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  63. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  64. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  65. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +19 -11
  66. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +2 -5
  67. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +19 -7
  68. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +1 -1
  69. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +98 -42
  70. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -0
  71. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +94 -61
  72. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +20 -8
  73. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  74. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  75. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +3 -21
  76. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +8 -0
  77. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +14 -18
  78. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +1 -1
  79. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +8 -2
  80. data/vendor/datasketches-cpp/hll/include/hll.hpp +79 -65
  81. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  82. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +7 -1
  83. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  84. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  85. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  86. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -1
  87. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
  88. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +79 -53
  89. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +61 -132
  90. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  91. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  92. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  93. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +5 -40
  94. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +76 -54
  95. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +66 -136
  96. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  97. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  98. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  99. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +15 -39
  100. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  101. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -4
  102. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +105 -26
  103. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +50 -111
  104. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  105. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  106. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  107. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  108. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  109. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  110. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
  111. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  112. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  113. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +89 -32
  114. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +33 -19
  115. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +13 -10
  116. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +23 -19
  117. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  118. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  119. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  120. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  121. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  122. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  123. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +33 -51
  124. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  125. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  126. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -20
  127. data/vendor/datasketches-cpp/theta/CMakeLists.txt +1 -0
  128. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +6279 -0
  129. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  130. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  131. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +14 -8
  132. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +60 -46
  133. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  134. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  135. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +3 -1
  136. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  137. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  138. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  139. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  140. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  141. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  142. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +180 -33
  143. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +430 -130
  144. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  145. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +10 -10
  146. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  147. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +21 -6
  148. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +13 -3
  149. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +15 -1
  150. data/vendor/datasketches-cpp/theta/test/bit_packing_test.cpp +80 -0
  151. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  152. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  153. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +39 -188
  154. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -0
  155. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  156. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  157. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  158. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  159. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  160. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection_impl.hpp +31 -0
  161. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  162. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  163. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  164. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  165. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  166. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  167. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  168. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  169. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +2 -1
  170. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  171. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  172. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  173. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  174. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  175. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  176. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  177. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  178. data/vendor/datasketches-cpp/{python/src/datasketches.cpp → tuple/test/tuple_sketch_serialize_for_java.cpp} +16 -30
  179. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  180. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  181. metadata +61 -79
  182. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  183. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  184. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  185. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  186. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  187. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  188. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  189. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  190. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  191. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  192. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  193. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -81
  194. data/vendor/datasketches-cpp/python/README.md +0 -85
  195. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -104
  196. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  197. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  198. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  199. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  200. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  201. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  202. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  203. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -90
  204. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -128
  205. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -134
  206. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -210
  207. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  208. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -111
  209. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -204
  210. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -215
  211. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -172
  212. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  213. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  214. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  215. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  216. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -110
  217. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -130
  218. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -125
  219. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -126
  220. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -126
  221. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -146
  222. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  223. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -125
  224. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  225. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  230. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  231. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  232. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  233. data/vendor/datasketches-cpp/setup.py +0 -110
  234. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  238. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  239. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  240. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  241. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  242. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  243. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  244. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  245. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -167,20 +167,6 @@ TEST_CASE("theta sketch: estimation", "[theta_sketch]") {
167
167
  REQUIRE(compact_sketch.get_upper_bound(1) > n);
168
168
  }
169
169
 
170
- TEST_CASE("theta sketch: deserialize compact empty from java", "[theta_sketch]") {
171
- std::ifstream is;
172
- is.exceptions(std::ios::failbit | std::ios::badbit);
173
- is.open(inputPath + "theta_compact_empty_from_java.sk", std::ios::binary);
174
- auto sketch = compact_theta_sketch::deserialize(is);
175
- REQUIRE(sketch.is_empty());
176
- REQUIRE_FALSE(sketch.is_estimation_mode());
177
- REQUIRE(sketch.get_num_retained() == 0);
178
- REQUIRE(sketch.get_theta() == 1.0);
179
- REQUIRE(sketch.get_estimate() == 0.0);
180
- REQUIRE(sketch.get_lower_bound(1) == 0.0);
181
- REQUIRE(sketch.get_upper_bound(1) == 0.0);
182
- }
183
-
184
170
  TEST_CASE("theta sketch: deserialize compact v1 empty from java", "[theta_sketch]") {
185
171
  std::ifstream is;
186
172
  is.exceptions(std::ios::failbit | std::ios::badbit);
@@ -209,88 +195,6 @@ TEST_CASE("theta sketch: deserialize compact v2 empty from java", "[theta_sketch
209
195
  REQUIRE(sketch.get_upper_bound(1) == 0.0);
210
196
  }
211
197
 
212
- TEST_CASE("theta sketch: deserialize single item from java", "[theta_sketch]") {
213
- std::ifstream is;
214
- is.exceptions(std::ios::failbit | std::ios::badbit);
215
- is.open(inputPath + "theta_compact_single_item_from_java.sk", std::ios::binary);
216
- auto sketch = compact_theta_sketch::deserialize(is);
217
- REQUIRE_FALSE(sketch.is_empty());
218
- REQUIRE_FALSE(sketch.is_estimation_mode());
219
- REQUIRE(sketch.get_num_retained() == 1);
220
- REQUIRE(sketch.get_theta() == 1.0);
221
- REQUIRE(sketch.get_estimate() == 1.0);
222
- REQUIRE(sketch.get_lower_bound(1) == 1.0);
223
- REQUIRE(sketch.get_upper_bound(1) == 1.0);
224
- }
225
-
226
- TEST_CASE("theta sketch: deserialize compact exact from java", "[theta_sketch]") {
227
- std::ifstream is;
228
- is.exceptions(std::ios::failbit | std::ios::badbit);
229
- is.open(inputPath + "theta_compact_exact_from_java.sk", std::ios::binary);
230
- auto sketch = compact_theta_sketch::deserialize(is);
231
- REQUIRE_FALSE(sketch.is_empty());
232
- REQUIRE_FALSE(sketch.is_estimation_mode());
233
- REQUIRE(sketch.is_ordered());
234
- REQUIRE(sketch.get_num_retained() == 100);
235
-
236
- // the same construction process in Java must have produced exactly the same sketch
237
- auto update_sketch = update_theta_sketch::builder().build();
238
- const int n = 100;
239
- for (int i = 0; i < n; i++) update_sketch.update(i);
240
- REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
241
- REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
242
- REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
243
- REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
244
- REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
245
- REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
246
- REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
247
- REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
248
- REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
249
- compact_theta_sketch compact_sketch = update_sketch.compact();
250
- // the sketches are ordered, so the iteration sequence must match exactly
251
- auto iter = sketch.begin();
252
- for (const auto& key: compact_sketch) {
253
- REQUIRE(*iter == key);
254
- ++iter;
255
- }
256
- }
257
-
258
- TEST_CASE("theta sketch: deserialize compact estimation from java", "[theta_sketch]") {
259
- std::ifstream is;
260
- is.exceptions(std::ios::failbit | std::ios::badbit);
261
- is.open(inputPath + "theta_compact_estimation_from_java.sk", std::ios::binary);
262
- auto sketch = compact_theta_sketch::deserialize(is);
263
- REQUIRE_FALSE(sketch.is_empty());
264
- REQUIRE(sketch.is_estimation_mode());
265
- REQUIRE(sketch.is_ordered());
266
- REQUIRE(sketch.get_num_retained() == 4342);
267
- REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
268
- REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
269
- REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
270
- REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
271
-
272
- // the same construction process in Java must have produced exactly the same sketch
273
- update_theta_sketch update_sketch = update_theta_sketch::builder().build();
274
- const int n = 8192;
275
- for (int i = 0; i < n; i++) update_sketch.update(i);
276
- REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
277
- REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
278
- REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
279
- REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
280
- REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
281
- REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
282
- REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
283
- REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
284
- REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
285
- compact_theta_sketch compact_sketch = update_sketch.compact();
286
- // the sketches are ordered, so the iteration sequence must match exactly
287
- auto iter = sketch.begin();
288
- for (const auto& key: compact_sketch) {
289
- REQUIRE(*iter == key);
290
- ++iter;
291
- }
292
- }
293
-
294
198
  TEST_CASE("theta sketch: deserialize compact v1 estimation from java", "[theta_sketch]") {
295
199
  std::ifstream is;
296
200
  is.exceptions(std::ios::failbit | std::ios::badbit);
@@ -473,30 +377,6 @@ TEST_CASE("theta sketch: conversion constructor and wrapped compact", "[theta_sk
473
377
  REQUIRE_THROWS_AS(wrapped_compact_theta_sketch::wrap(bytes.data(), bytes.size(), 0), std::invalid_argument);
474
378
  }
475
379
 
476
- TEST_CASE("theta sketch: wrap compact empty from java", "[theta_sketch]") {
477
- std::ifstream is;
478
- is.exceptions(std::ios::failbit | std::ios::badbit);
479
- is.open(inputPath + "theta_compact_empty_from_java.sk", std::ios::binary | std::ios::ate);
480
-
481
- std::vector<uint8_t> buf;
482
- if(is) {
483
- auto size = is.tellg();
484
- buf.reserve(size);
485
- buf.assign(size, 0);
486
- is.seekg(0, std::ios_base::beg);
487
- is.read((char*)(buf.data()), buf.size());
488
- }
489
-
490
- auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
491
- REQUIRE(sketch.is_empty());
492
- REQUIRE_FALSE(sketch.is_estimation_mode());
493
- REQUIRE(sketch.get_num_retained() == 0);
494
- REQUIRE(sketch.get_theta() == 1.0);
495
- REQUIRE(sketch.get_estimate() == 0.0);
496
- REQUIRE(sketch.get_lower_bound(1) == 0.0);
497
- REQUIRE(sketch.get_upper_bound(1) == 0.0);
498
- }
499
-
500
380
  TEST_CASE("theta sketch: wrap compact v1 empty from java", "[theta_sketch]") {
501
381
  std::ifstream is;
502
382
  is.exceptions(std::ios::failbit | std::ios::badbit);
@@ -545,33 +425,10 @@ TEST_CASE("theta sketch: wrap compact v2 empty from java", "[theta_sketch]") {
545
425
  REQUIRE(sketch.get_upper_bound(1) == 0.0);
546
426
  }
547
427
 
548
- TEST_CASE("theta sketch: wrap single item from java", "[theta_sketch]") {
549
- std::ifstream is;
550
- is.exceptions(std::ios::failbit | std::ios::badbit);
551
- is.open(inputPath + "theta_compact_single_item_from_java.sk", std::ios::binary | std::ios::ate);
552
- std::vector<uint8_t> buf;
553
- if(is) {
554
- auto size = is.tellg();
555
- buf.reserve(size);
556
- buf.assign(size, 0);
557
- is.seekg(0, std::ios_base::beg);
558
- is.read((char*)(buf.data()), buf.size());
559
- }
560
-
561
- auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
562
- REQUIRE_FALSE(sketch.is_empty());
563
- REQUIRE_FALSE(sketch.is_estimation_mode());
564
- REQUIRE(sketch.get_num_retained() == 1);
565
- REQUIRE(sketch.get_theta() == 1.0);
566
- REQUIRE(sketch.get_estimate() == 1.0);
567
- REQUIRE(sketch.get_lower_bound(1) == 1.0);
568
- REQUIRE(sketch.get_upper_bound(1) == 1.0);
569
- }
570
-
571
- TEST_CASE("theta sketch: wrap compact estimation from java", "[theta_sketch]") {
428
+ TEST_CASE("theta sketch: wrap compact v1 estimation from java", "[theta_sketch]") {
572
429
  std::ifstream is;
573
430
  is.exceptions(std::ios::failbit | std::ios::badbit);
574
- is.open(inputPath + "theta_compact_estimation_from_java.sk", std::ios::binary | std::ios::ate);
431
+ is.open(inputPath + "theta_compact_estimation_from_java_v1.sk", std::ios::binary | std::ios::ate);
575
432
  std::vector<uint8_t> buf;
576
433
  if(is) {
577
434
  auto size = is.tellg();
@@ -584,7 +441,7 @@ TEST_CASE("theta sketch: wrap compact estimation from java", "[theta_sketch]") {
584
441
  auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
585
442
  REQUIRE_FALSE(sketch.is_empty());
586
443
  REQUIRE(sketch.is_estimation_mode());
587
- REQUIRE(sketch.is_ordered());
444
+ // REQUIRE(sketch.is_ordered()); // v1 may not be ordered
588
445
  REQUIRE(sketch.get_num_retained() == 4342);
589
446
  REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
590
447
  REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
@@ -607,16 +464,16 @@ TEST_CASE("theta sketch: wrap compact estimation from java", "[theta_sketch]") {
607
464
  compact_theta_sketch compact_sketch = update_sketch.compact();
608
465
  // the sketches are ordered, so the iteration sequence must match exactly
609
466
  auto iter = sketch.begin();
610
- for (const auto& key: compact_sketch) {
467
+ for (const auto key: compact_sketch) {
611
468
  REQUIRE(*iter == key);
612
469
  ++iter;
613
470
  }
614
471
  }
615
472
 
616
- TEST_CASE("theta sketch: wrap compact v1 estimation from java", "[theta_sketch]") {
473
+ TEST_CASE("theta sketch: wrap compact v2 estimation from java", "[theta_sketch]") {
617
474
  std::ifstream is;
618
475
  is.exceptions(std::ios::failbit | std::ios::badbit);
619
- is.open(inputPath + "theta_compact_estimation_from_java_v1.sk", std::ios::binary | std::ios::ate);
476
+ is.open(inputPath + "theta_compact_estimation_from_java_v2.sk", std::ios::binary | std::ios::ate);
620
477
  std::vector<uint8_t> buf;
621
478
  if(is) {
622
479
  auto size = is.tellg();
@@ -652,52 +509,46 @@ TEST_CASE("theta sketch: wrap compact v1 estimation from java", "[theta_sketch]"
652
509
  compact_theta_sketch compact_sketch = update_sketch.compact();
653
510
  // the sketches are ordered, so the iteration sequence must match exactly
654
511
  auto iter = sketch.begin();
655
- for (const auto& key: compact_sketch) {
512
+ for (const auto key: compact_sketch) {
656
513
  REQUIRE(*iter == key);
657
514
  ++iter;
658
515
  }
659
516
  }
660
517
 
661
- TEST_CASE("theta sketch: wrap compact v2 estimation from java", "[theta_sketch]") {
662
- std::ifstream is;
663
- is.exceptions(std::ios::failbit | std::ios::badbit);
664
- is.open(inputPath + "theta_compact_estimation_from_java_v2.sk", std::ios::binary | std::ios::ate);
665
- std::vector<uint8_t> buf;
666
- if(is) {
667
- auto size = is.tellg();
668
- buf.reserve(size);
669
- buf.assign(size, 0);
670
- is.seekg(0, std::ios_base::beg);
671
- is.read((char*)(buf.data()), buf.size());
518
+ TEST_CASE("theta sketch: serialize deserialize compressed", "[theta_sketch]") {
519
+ auto update_sketch = update_theta_sketch::builder().build();
520
+ for (int i = 0; i < 10000; i++) update_sketch.update(i);
521
+ auto compact_sketch = update_sketch.compact();
522
+
523
+ auto bytes = compact_sketch.serialize_compressed();
524
+ { // deserialize bytes
525
+ auto deserialized_sketch = compact_theta_sketch::deserialize(bytes.data(), bytes.size());
526
+ REQUIRE(deserialized_sketch.get_num_retained() == compact_sketch.get_num_retained());
527
+ REQUIRE(deserialized_sketch.get_theta() == compact_sketch.get_theta());
528
+ auto iter = deserialized_sketch.begin();
529
+ for (const auto key: compact_sketch) {
530
+ REQUIRE(*iter == key);
531
+ ++iter;
532
+ }
533
+ }
534
+ { // wrap bytes
535
+ auto wrapped_sketch = wrapped_compact_theta_sketch::wrap(bytes.data(), bytes.size());
536
+ REQUIRE(wrapped_sketch.get_num_retained() == compact_sketch.get_num_retained());
537
+ REQUIRE(wrapped_sketch.get_theta() == compact_sketch.get_theta());
538
+ auto iter = wrapped_sketch.begin();
539
+ for (const auto key: compact_sketch) {
540
+ REQUIRE(*iter == key);
541
+ ++iter;
542
+ }
672
543
  }
673
544
 
674
- auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
675
- REQUIRE_FALSE(sketch.is_empty());
676
- REQUIRE(sketch.is_estimation_mode());
677
- // REQUIRE(sketch.is_ordered()); // v1 may not be ordered
678
- REQUIRE(sketch.get_num_retained() == 4342);
679
- REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
680
- REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
681
- REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
682
- REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
683
-
684
- // the same construction process in Java must have produced exactly the same sketch
685
- update_theta_sketch update_sketch = update_theta_sketch::builder().build();
686
- const int n = 8192;
687
- for (int i = 0; i < n; i++) update_sketch.update(i);
688
- REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
689
- REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
690
- REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
691
- REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
692
- REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
693
- REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
694
- REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
695
- REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
696
- REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
697
- compact_theta_sketch compact_sketch = update_sketch.compact();
698
- // the sketches are ordered, so the iteration sequence must match exactly
699
- auto iter = sketch.begin();
700
- for (const auto& key: compact_sketch) {
545
+ std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
546
+ compact_sketch.serialize_compressed(s);
547
+ auto deserialized_sketch = compact_theta_sketch::deserialize(s);
548
+ REQUIRE(deserialized_sketch.get_num_retained() == compact_sketch.get_num_retained());
549
+ REQUIRE(deserialized_sketch.get_theta() == compact_sketch.get_theta());
550
+ auto iter = deserialized_sketch.begin();
551
+ for (const auto key: compact_sketch) {
701
552
  REQUIRE(*iter == key);
702
553
  ++iter;
703
554
  }
@@ -128,4 +128,29 @@ TEST_CASE("theta union: seed mismatch", "[theta_union]") {
128
128
  REQUIRE_THROWS_AS(u.update(sketch), std::invalid_argument);
129
129
  }
130
130
 
131
+ TEST_CASE("theta union: larger K", "[theta_union]") {
132
+ auto update_sketch1 = datasketches::update_theta_sketch::builder().set_lg_k(14).build();
133
+ for(int i = 0; i < 16384; ++i) update_sketch1.update(i);
134
+
135
+ auto update_sketch2 = datasketches::update_theta_sketch::builder().set_lg_k(14).build();
136
+ for(int i = 0; i < 26384; ++i) update_sketch2.update(i);
137
+
138
+ auto update_sketch3 = datasketches::update_theta_sketch::builder().set_lg_k(14).build();
139
+ for(int i = 0; i < 86384; ++i) update_sketch3.update(i);
140
+
141
+ auto union1 = datasketches::theta_union::builder().set_lg_k(16).build();
142
+ union1.update(update_sketch2);
143
+ union1.update(update_sketch1);
144
+ union1.update(update_sketch3);
145
+ auto result1 = union1.get_result();
146
+ REQUIRE(result1.get_estimate() == update_sketch3.get_estimate());
147
+
148
+ auto union2 = datasketches::theta_union::builder().set_lg_k(16).build();
149
+ union2.update(update_sketch1);
150
+ union2.update(update_sketch3);
151
+ union2.update(update_sketch2);
152
+ auto result2 = union2.get_result();
153
+ REQUIRE(result2.get_estimate() == update_sketch3.get_estimate());
154
+ }
155
+
131
156
  } /* namespace datasketches */
@@ -47,11 +47,12 @@ install(FILES
47
47
  include/tuple_a_not_b_impl.hpp
48
48
  include/tuple_jaccard_similarity.hpp
49
49
  include/array_of_doubles_sketch.hpp
50
- include/array_of_doubles_sketch_impl.hpp
51
- include/array_of_doubles_union.hpp
52
- include/array_of_doubles_union_impl.hpp
53
- include/array_of_doubles_intersection.hpp
54
- include/array_of_doubles_intersection_impl.hpp
55
- include/array_of_doubles_a_not_b.hpp
56
- include/array_of_doubles_a_not_b_impl.hpp
50
+ include/array_tuple_sketch.hpp
51
+ include/array_tuple_sketch_impl.hpp
52
+ include/array_tuple_union.hpp
53
+ include/array_tuple_union_impl.hpp
54
+ include/array_tuple_intersection.hpp
55
+ include/array_tuple_intersection_impl.hpp
56
+ include/array_tuple_a_not_b.hpp
57
+ include/array_tuple_a_not_b_impl.hpp
57
58
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
@@ -20,160 +20,35 @@
20
20
  #ifndef ARRAY_OF_DOUBLES_SKETCH_HPP_
21
21
  #define ARRAY_OF_DOUBLES_SKETCH_HPP_
22
22
 
23
- #include <vector>
24
- #include <memory>
25
-
26
- #include "serde.hpp"
27
- #include "tuple_sketch.hpp"
23
+ #include "array_tuple_sketch.hpp"
24
+ #include "array_tuple_union.hpp"
25
+ #include "array_tuple_intersection.hpp"
26
+ #include "array_tuple_a_not_b.hpp"
28
27
 
29
28
  namespace datasketches {
30
29
 
31
- // This sketch is equivalent of ArrayOfDoublesSketch in Java
32
-
33
- // This simple array of double is faster than std::vector and should be sufficient for this application
34
- template<typename Allocator = std::allocator<double>>
35
- class aod {
36
- public:
37
- explicit aod(uint8_t size, const Allocator& allocator = Allocator()):
38
- allocator_(allocator), size_(size), array_(allocator_.allocate(size_)) {
39
- std::fill(array_, array_ + size_, 0);
40
- }
41
- aod(const aod& other):
42
- allocator_(other.allocator_),
43
- size_(other.size_),
44
- array_(allocator_.allocate(size_))
45
- {
46
- std::copy(other.array_, other.array_ + size_, array_);
47
- }
48
- aod(aod&& other) noexcept:
49
- allocator_(std::move(other.allocator_)),
50
- size_(other.size_),
51
- array_(other.array_)
52
- {
53
- other.array_ = nullptr;
54
- }
55
- ~aod() {
56
- if (array_ != nullptr) allocator_.deallocate(array_, size_);
57
- }
58
- aod& operator=(const aod& other) {
59
- aod copy(other);
60
- std::swap(allocator_, copy.allocator_);
61
- std::swap(size_, copy.size_);
62
- std::swap(array_, copy.array_);
63
- return *this;
64
- }
65
- aod& operator=(aod&& other) {
66
- std::swap(allocator_, other.allocator_);
67
- std::swap(size_, other.size_);
68
- std::swap(array_, other.array_);
69
- return *this;
70
- }
71
- double& operator[](size_t index) { return array_[index]; }
72
- double operator[](size_t index) const { return array_[index]; }
73
- uint8_t size() const { return size_; }
74
- double* data() { return array_; }
75
- const double* data() const { return array_; }
76
- bool operator==(const aod& other) const {
77
- for (uint8_t i = 0; i < size_; ++i) if (array_[i] != other.array_[i]) return false;
78
- return true;
79
- }
80
- private:
81
- Allocator allocator_;
82
- uint8_t size_;
83
- double* array_;
84
- };
85
-
86
- template<typename A = std::allocator<double>>
87
- class array_of_doubles_update_policy {
88
- public:
89
- array_of_doubles_update_policy(uint8_t num_values = 1, const A& allocator = A()):
90
- allocator_(allocator), num_values_(num_values) {}
91
- aod<A> create() const {
92
- return aod<A>(num_values_, allocator_);
93
- }
94
- template<typename InputVector> // to allow any type with indexed access (such as double*)
95
- void update(aod<A>& summary, const InputVector& update) const {
96
- for (uint8_t i = 0; i < num_values_; ++i) summary[i] += update[i];
97
- }
98
- uint8_t get_num_values() const {
99
- return num_values_;
100
- }
101
-
102
- private:
103
- A allocator_;
104
- uint8_t num_values_;
105
- };
106
-
107
- // forward declaration
108
- template<typename A> class compact_array_of_doubles_sketch_alloc;
109
-
110
- template<typename A> using AllocAOD = typename std::allocator_traits<A>::template rebind_alloc<aod<A>>;
111
-
112
- template<typename A = std::allocator<double>>
113
- class update_array_of_doubles_sketch_alloc: public update_tuple_sketch<aod<A>, aod<A>, array_of_doubles_update_policy<A>, AllocAOD<A>> {
114
- public:
115
- using Base = update_tuple_sketch<aod<A>, aod<A>, array_of_doubles_update_policy<A>, AllocAOD<A>>;
116
- using resize_factor = typename Base::resize_factor;
117
-
118
- class builder;
30
+ /// convenience alias with default allocator, default policy for update_array_of_doubles_sketch
31
+ using default_array_of_doubles_update_policy = default_array_tuple_update_policy<array<double>>;
119
32
 
120
- compact_array_of_doubles_sketch_alloc<A> compact(bool ordered = true) const;
121
- uint8_t get_num_values() const;
33
+ /// convenience alias with default allocator, equivalent to ArrayOfDoublesUpdatableSketch in Java
34
+ using update_array_of_doubles_sketch = update_array_tuple_sketch<array<double>>;
122
35
 
123
- private:
124
- // for builder
125
- update_array_of_doubles_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta,
126
- uint64_t seed, const array_of_doubles_update_policy<A>& policy, const A& allocator);
127
- };
36
+ /// convenience alias with default allocator, equivalent to ArrayOfDoublesCompactSketch in Java
37
+ using compact_array_of_doubles_sketch = compact_array_tuple_sketch<array<double>>;
128
38
 
129
- // alias with the default allocator for convenience
130
- using update_array_of_doubles_sketch = update_array_of_doubles_sketch_alloc<>;
39
+ /// convenience alias, default policy for array_of_doubles_union
40
+ using default_array_of_doubles_union_policy = default_array_tuple_union_policy<array<double>>;
131
41
 
132
- template<typename A>
133
- class update_array_of_doubles_sketch_alloc<A>::builder: public tuple_base_builder<builder, array_of_doubles_update_policy<A>, A> {
134
- public:
135
- builder(const array_of_doubles_update_policy<A>& policy = array_of_doubles_update_policy<A>(), const A& allocator = A());
136
- update_array_of_doubles_sketch_alloc<A> build() const;
137
- };
42
+ /// convenience alias with default allocator, equivalent to ArrayOfDoublesUnion in Java
43
+ using array_of_doubles_union = array_tuple_union<array<double>>;
138
44
 
139
- template<typename A = std::allocator<double>>
140
- class compact_array_of_doubles_sketch_alloc: public compact_tuple_sketch<aod<A>, AllocAOD<A>> {
141
- public:
142
- using Base = compact_tuple_sketch<aod<A>, AllocAOD<A>>;
143
- using Entry = typename Base::Entry;
144
- using AllocEntry = typename Base::AllocEntry;
145
- using AllocU64 = typename Base::AllocU64;
146
- using vector_bytes = typename Base::vector_bytes;
45
+ /// convenience alias with default allocator, equivalent to ArrayOfDoublesIntersection in Java
46
+ /// no default policy since it is not clear in general
47
+ template<typename Policy> using array_of_doubles_intersection = array_tuple_intersection<array<double>, Policy>;
147
48
 
148
- static const uint8_t SERIAL_VERSION = 1;
149
- static const uint8_t SKETCH_FAMILY = 9;
150
- static const uint8_t SKETCH_TYPE = 3;
151
- enum flags { UNUSED1, UNUSED2, IS_EMPTY, HAS_ENTRIES, IS_ORDERED };
152
-
153
- template<typename Sketch>
154
- compact_array_of_doubles_sketch_alloc(const Sketch& other, bool ordered = true);
155
-
156
- uint8_t get_num_values() const;
157
-
158
- void serialize(std::ostream& os) const;
159
- vector_bytes serialize(unsigned header_size_bytes = 0) const;
160
-
161
- static compact_array_of_doubles_sketch_alloc deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
162
- static compact_array_of_doubles_sketch_alloc deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED,
163
- const A& allocator = A());
164
-
165
- // for internal use
166
- compact_array_of_doubles_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries, uint8_t num_values);
167
- compact_array_of_doubles_sketch_alloc(uint8_t num_values, Base&& base);
168
- private:
169
- uint8_t num_values_;
170
- };
171
-
172
- // alias with the default allocator for convenience
173
- using compact_array_of_doubles_sketch = compact_array_of_doubles_sketch_alloc<>;
49
+ /// convenience alias with default allocator, equivalent to ArrayOfDoublesAnotB in Java
50
+ using array_of_doubles_a_not_b = array_tuple_a_not_b<array<double>>;
174
51
 
175
52
  } /* namespace datasketches */
176
53
 
177
- #include "array_of_doubles_sketch_impl.hpp"
178
-
179
54
  #endif
@@ -17,36 +17,44 @@
17
17
  * under the License.
18
18
  */
19
19
 
20
- #ifndef ARRAY_OF_DOUBLES_A_NOT_B_HPP_
21
- #define ARRAY_OF_DOUBLES_A_NOT_B_HPP_
20
+ #ifndef ARRAY_TUPLE_A_NOT_B_HPP_
21
+ #define ARRAY_TUPLE_A_NOT_B_HPP_
22
22
 
23
23
  #include <vector>
24
24
  #include <memory>
25
25
 
26
- #include "array_of_doubles_sketch.hpp"
26
+ #include "array_tuple_sketch.hpp"
27
27
  #include "tuple_a_not_b.hpp"
28
28
 
29
29
  namespace datasketches {
30
30
 
31
- template<typename Allocator = std::allocator<double>>
32
- class array_of_doubles_a_not_b_alloc: tuple_a_not_b<aod<Allocator>, AllocAOD<Allocator>> {
31
+ /// array tuple A-not-B
32
+ template<typename Array, typename Allocator = typename Array::allocator_type>
33
+ class array_tuple_a_not_b: tuple_a_not_b<Array, Allocator> {
33
34
  public:
34
- using Summary = aod<Allocator>;
35
- using AllocSummary = AllocAOD<Allocator>;
36
- using Base = tuple_a_not_b<Summary, AllocSummary>;
37
- using CompactSketch = compact_array_of_doubles_sketch_alloc<Allocator>;
38
-
39
- explicit array_of_doubles_a_not_b_alloc(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
40
-
35
+ using Base = tuple_a_not_b<Array, Allocator>;
36
+ using CompactSketch = compact_array_tuple_sketch<Array, Allocator>;
37
+
38
+ /**
39
+ * Constructor
40
+ * @param seed for the hash function that was used to create the sketch
41
+ * @param allocator to use for allocating and deallocating memory
42
+ */
43
+ explicit array_tuple_a_not_b(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
44
+
45
+ /**
46
+ * Computes the A-not-B set operation given two sketches.
47
+ * @param a sketch A
48
+ * @param b sketch B
49
+ * @param ordered optional flag to specify if an ordered sketch should be produced
50
+ * @return the result of A-not-B as a compact sketch
51
+ */
41
52
  template<typename FwdSketch, typename Sketch>
42
53
  CompactSketch compute(FwdSketch&& a, const Sketch& b, bool ordered = true) const;
43
54
  };
44
55
 
45
- // alias with the default allocator for convenience
46
- using array_of_doubles_a_not_b = array_of_doubles_a_not_b_alloc<>;
47
-
48
56
  } /* namespace datasketches */
49
57
 
50
- #include "array_of_doubles_a_not_b_impl.hpp"
58
+ #include "array_tuple_a_not_b_impl.hpp"
51
59
 
52
60
  #endif
@@ -19,13 +19,13 @@
19
19
 
20
20
  namespace datasketches {
21
21
 
22
- template<typename A>
23
- array_of_doubles_a_not_b_alloc<A>::array_of_doubles_a_not_b_alloc(uint64_t seed, const A& allocator):
22
+ template<typename Array, typename Allocator>
23
+ array_tuple_a_not_b<Array, Allocator>::array_tuple_a_not_b(uint64_t seed, const Allocator& allocator):
24
24
  Base(seed, allocator) {}
25
25
 
26
- template<typename A>
26
+ template<typename Array, typename Allocator>
27
27
  template<typename FwdSketch, typename Sketch>
28
- auto array_of_doubles_a_not_b_alloc<A>::compute(FwdSketch&& a, const Sketch& b, bool ordered) const -> CompactSketch {
28
+ auto array_tuple_a_not_b<Array, Allocator>::compute(FwdSketch&& a, const Sketch& b, bool ordered) const -> CompactSketch {
29
29
  return CompactSketch(a.get_num_values(), Base::compute(std::forward<FwdSketch>(a), b, ordered));
30
30
  }
31
31
 
@@ -0,0 +1,65 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef ARRAY_TUPLE_INTERSECTION_HPP_
21
+ #define ARRAY_TUPLE_INTERSECTION_HPP_
22
+
23
+ #include <vector>
24
+ #include <memory>
25
+
26
+ #include "array_tuple_sketch.hpp"
27
+ #include "tuple_intersection.hpp"
28
+
29
+ namespace datasketches {
30
+
31
+ /// array tuple intersection
32
+ template<
33
+ typename Array,
34
+ typename Policy,
35
+ typename Allocator = typename Array::allocator_type
36
+ >
37
+ class array_tuple_intersection: public tuple_intersection<Array, Policy, Allocator> {
38
+ public:
39
+ using Base = tuple_intersection<Array, Policy, Allocator>;
40
+ using CompactSketch = compact_array_tuple_sketch<Array, Allocator>;
41
+ using resize_factor = theta_constants::resize_factor;
42
+
43
+ /**
44
+ * Constructor
45
+ * @param seed for the hash function that was used to create the sketch
46
+ * @param policy user-defined way of combining Summary during intersection
47
+ * @param allocator to use for allocating and deallocating memory
48
+ */
49
+ explicit array_tuple_intersection(uint64_t seed = DEFAULT_SEED, const Policy& policy = Policy(), const Allocator& allocator = Allocator());
50
+
51
+ /**
52
+ * Produces a copy of the current state of the intersection.
53
+ * If update() was not called, the state is the infinite "universe",
54
+ * which is considered an undefined state, and throws an exception.
55
+ * @param ordered optional flag to specify if an ordered sketch should be produced
56
+ * @return the result of the intersection as a compact sketch
57
+ */
58
+ CompactSketch get_result(bool ordered = true) const;
59
+ };
60
+
61
+ } /* namespace datasketches */
62
+
63
+ #include "array_tuple_intersection_impl.hpp"
64
+
65
+ #endif