datasketches 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +2 -2
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/NOTICE +1 -1
  15. data/vendor/datasketches-cpp/README.md +1 -3
  16. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  21. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  23. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  24. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  25. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  26. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  27. data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
  28. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
  29. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
  30. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
  31. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
  32. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  33. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  34. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  35. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  36. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
  37. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  38. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  42. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  43. data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  46. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
  47. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
  48. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
  49. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  50. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  51. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  52. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  53. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  54. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  55. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  56. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  57. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
  58. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
  59. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  60. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
  62. data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
  63. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  64. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  65. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  66. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  67. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
  70. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  71. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
  74. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
  75. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
  76. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  77. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  78. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  79. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
  80. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
  84. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  85. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  86. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  87. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  88. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  89. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  90. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
  91. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  92. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
  94. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
  95. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
  96. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
  97. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  98. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  99. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  100. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  101. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  102. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  103. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
  107. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
  108. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  110. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  114. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  115. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  116. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  117. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
  120. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
  121. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  122. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  124. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
  125. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
  126. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
  127. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
  130. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  131. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  132. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  133. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  134. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  135. data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
  136. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  137. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  138. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  139. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  140. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  141. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  142. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  143. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  144. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  145. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  146. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  147. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  148. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  149. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  150. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  151. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  152. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
  153. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  154. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  155. metadata +47 -93
  156. data/vendor/datasketches-cpp/MANIFEST.in +0 -39
  157. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  160. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  161. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  162. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  163. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  164. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  165. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  166. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  167. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  168. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
  169. data/vendor/datasketches-cpp/python/README.md +0 -85
  170. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
  171. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
  172. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
  173. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
  174. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
  175. data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
  176. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
  177. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  178. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
  179. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
  180. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  181. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  182. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  183. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  184. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  185. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  186. data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
  187. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
  188. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
  189. data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
  190. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
  191. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
  192. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
  193. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
  194. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  195. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
  196. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
  197. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
  198. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
  199. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
  200. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  201. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  202. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  203. data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
  204. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  205. data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
  206. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
  207. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
  208. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
  209. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
  210. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
  211. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
  212. data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
  213. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  214. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
  215. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  216. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  217. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  218. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  219. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  220. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  221. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  222. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  223. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  224. data/vendor/datasketches-cpp/setup.py +0 -110
  225. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tox.ini +0 -26
  230. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  231. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  232. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  233. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  234. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -19,56 +19,55 @@
19
19
 
20
20
  namespace datasketches {
21
21
 
22
- template<typename A>
23
- update_array_of_doubles_sketch_alloc<A>::update_array_of_doubles_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
24
- float p, uint64_t theta, uint64_t seed, const array_of_doubles_update_policy<A>& policy, const A& allocator):
22
+ template<typename Array, typename Policy, typename Allocator>
23
+ update_array_tuple_sketch<Array, Policy, Allocator>::update_array_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
24
+ float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator):
25
25
  Base(lg_cur_size, lg_nom_size, rf, p, theta, seed, policy, allocator) {}
26
26
 
27
-
28
- template<typename A>
29
- uint8_t update_array_of_doubles_sketch_alloc<A>::get_num_values() const {
27
+ template<typename Array, typename Policy, typename Allocator>
28
+ uint8_t update_array_tuple_sketch<Array, Policy, Allocator>::get_num_values() const {
30
29
  return this->policy_.get_num_values();
31
30
  }
32
31
 
33
- template<typename A>
34
- compact_array_of_doubles_sketch_alloc<A> update_array_of_doubles_sketch_alloc<A>::compact(bool ordered) const {
35
- return compact_array_of_doubles_sketch_alloc<A>(*this, ordered);
32
+ template<typename Array, typename Policy, typename Allocator>
33
+ compact_array_tuple_sketch<Array, Allocator> update_array_tuple_sketch<Array, Policy, Allocator>::compact(bool ordered) const {
34
+ return compact_array_tuple_sketch<Array, Allocator>(*this, ordered);
36
35
  }
37
36
 
38
37
  // builder
39
38
 
40
- template<typename A>
41
- update_array_of_doubles_sketch_alloc<A>::builder::builder(const array_of_doubles_update_policy<A>& policy, const A& allocator):
42
- tuple_base_builder<builder, array_of_doubles_update_policy<A>, A>(policy, allocator) {}
39
+ template<typename Array, typename Policy, typename Allocator>
40
+ update_array_tuple_sketch<Array, Policy, Allocator>::builder::builder(const Policy& policy, const Allocator& allocator):
41
+ tuple_base_builder<builder, Policy, Allocator>(policy, allocator) {}
43
42
 
44
- template<typename A>
45
- update_array_of_doubles_sketch_alloc<A> update_array_of_doubles_sketch_alloc<A>::builder::build() const {
46
- return update_array_of_doubles_sketch_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
43
+ template<typename Array, typename Policy, typename Allocator>
44
+ auto update_array_tuple_sketch<Array, Policy, Allocator>::builder::build() const -> update_array_tuple_sketch {
45
+ return update_array_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
47
46
  }
48
47
 
49
48
  // compact sketch
50
49
 
51
- template<typename A>
50
+ template<typename Array, typename Allocator>
52
51
  template<typename S>
53
- compact_array_of_doubles_sketch_alloc<A>::compact_array_of_doubles_sketch_alloc(const S& other, bool ordered):
52
+ compact_array_tuple_sketch<Array, Allocator>::compact_array_tuple_sketch(const S& other, bool ordered):
54
53
  Base(other, ordered), num_values_(other.get_num_values()) {}
55
54
 
56
- template<typename A>
57
- compact_array_of_doubles_sketch_alloc<A>::compact_array_of_doubles_sketch_alloc(bool is_empty, bool is_ordered,
55
+ template<typename Array, typename Allocator>
56
+ compact_array_tuple_sketch<Array, Allocator>::compact_array_tuple_sketch(bool is_empty, bool is_ordered,
58
57
  uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries, uint8_t num_values):
59
58
  Base(is_empty, is_ordered, seed_hash, theta, std::move(entries)), num_values_(num_values) {}
60
59
 
61
- template<typename A>
62
- compact_array_of_doubles_sketch_alloc<A>::compact_array_of_doubles_sketch_alloc(uint8_t num_values, Base&& base):
60
+ template<typename Array, typename Allocator>
61
+ compact_array_tuple_sketch<Array, Allocator>::compact_array_tuple_sketch(uint8_t num_values, Base&& base):
63
62
  Base(std::move(base)), num_values_(num_values) {}
64
63
 
65
- template<typename A>
66
- uint8_t compact_array_of_doubles_sketch_alloc<A>::get_num_values() const {
64
+ template<typename Array, typename Allocator>
65
+ uint8_t compact_array_tuple_sketch<Array, Allocator>::get_num_values() const {
67
66
  return num_values_;
68
67
  }
69
68
 
70
- template<typename A>
71
- void compact_array_of_doubles_sketch_alloc<A>::serialize(std::ostream& os) const {
69
+ template<typename Array, typename Allocator>
70
+ void compact_array_tuple_sketch<Array, Allocator>::serialize(std::ostream& os) const {
72
71
  const uint8_t preamble_longs = 1;
73
72
  write(os, preamble_longs);
74
73
  const uint8_t serial_version = SERIAL_VERSION;
@@ -96,17 +95,17 @@ void compact_array_of_doubles_sketch_alloc<A>::serialize(std::ostream& os) const
96
95
  write(os, it.first);
97
96
  }
98
97
  for (const auto& it: this->entries_) {
99
- write(os, it.second.data(), it.second.size() * sizeof(double));
98
+ write(os, it.second.data(), it.second.size() * sizeof(typename Array::value_type));
100
99
  }
101
100
  }
102
101
  }
103
102
 
104
- template<typename A>
105
- auto compact_array_of_doubles_sketch_alloc<A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
103
+ template<typename Array, typename Allocator>
104
+ auto compact_array_tuple_sketch<Array, Allocator>::serialize(unsigned header_size_bytes) const -> vector_bytes {
106
105
  const uint8_t preamble_longs = 1;
107
106
  const size_t size = header_size_bytes + 16 // preamble and theta
108
107
  + (this->entries_.size() > 0 ? 8 : 0)
109
- + (sizeof(uint64_t) + sizeof(double) * num_values_) * this->entries_.size();
108
+ + (sizeof(uint64_t) + sizeof(typename Array::value_type) * num_values_) * this->entries_.size();
110
109
  vector_bytes bytes(size, 0, this->entries_.get_allocator());
111
110
  uint8_t* ptr = bytes.data() + header_size_bytes;
112
111
 
@@ -135,14 +134,14 @@ auto compact_array_of_doubles_sketch_alloc<A>::serialize(unsigned header_size_by
135
134
  ptr += copy_to_mem(it.first, ptr);
136
135
  }
137
136
  for (const auto& it: this->entries_) {
138
- ptr += copy_to_mem(it.second.data(), ptr, it.second.size() * sizeof(double));
137
+ ptr += copy_to_mem(it.second.data(), ptr, it.second.size() * sizeof(typename Array::value_type));
139
138
  }
140
139
  }
141
140
  return bytes;
142
141
  }
143
142
 
144
- template<typename A>
145
- compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) {
143
+ template<typename Array, typename Allocator>
144
+ compact_array_tuple_sketch<Array, Allocator> compact_array_tuple_sketch<Array, Allocator>::deserialize(std::istream& is, uint64_t seed, const Allocator& allocator) {
146
145
  read<uint8_t>(is); // unused
147
146
  const auto serial_version = read<uint8_t>(is);
148
147
  const auto family = read<uint8_t>(is);
@@ -165,19 +164,19 @@ compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A
165
164
  std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator);
166
165
  read(is, keys.data(), num_entries * sizeof(uint64_t));
167
166
  for (size_t i = 0; i < num_entries; ++i) {
168
- aod<A> summary(num_values, allocator);
169
- read(is, summary.data(), num_values * sizeof(double));
167
+ Array summary(num_values, 0, allocator);
168
+ read(is, summary.data(), num_values * sizeof(typename Array::value_type));
170
169
  entries.push_back(Entry(keys[i], std::move(summary)));
171
170
  }
172
171
  }
173
172
  if (!is.good()) throw std::runtime_error("error reading from std::istream");
174
173
  const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
175
174
  const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
176
- return compact_array_of_doubles_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries), num_values);
175
+ return compact_array_tuple_sketch<Array, Allocator>(is_empty, is_ordered, seed_hash, theta, std::move(entries), num_values);
177
176
  }
178
177
 
179
- template<typename A>
180
- compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) {
178
+ template<typename Array, typename Allocator>
179
+ compact_array_tuple_sketch<Array, Allocator> compact_array_tuple_sketch<Array, Allocator>::deserialize(const void* bytes, size_t size, uint64_t seed, const Allocator& allocator) {
181
180
  ensure_minimum_memory(size, 16);
182
181
  const char* ptr = static_cast<const char*>(bytes);
183
182
  ptr += sizeof(uint8_t); // unused
@@ -207,19 +206,19 @@ compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A
207
206
  uint32_t num_entries;
208
207
  ptr += copy_from_mem(ptr, num_entries);
209
208
  ptr += sizeof(uint32_t); // unused
210
- ensure_minimum_memory(size, 24 + (sizeof(uint64_t) + sizeof(double) * num_values) * num_entries);
209
+ ensure_minimum_memory(size, 24 + (sizeof(uint64_t) + sizeof(typename Array::value_type) * num_values) * num_entries);
211
210
  entries.reserve(num_entries);
212
211
  std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator);
213
212
  ptr += copy_from_mem(ptr, keys.data(), sizeof(uint64_t) * num_entries);
214
213
  for (size_t i = 0; i < num_entries; ++i) {
215
- aod<A> summary(num_values, allocator);
216
- ptr += copy_from_mem(ptr, summary.data(), num_values * sizeof(double));
214
+ Array summary(num_values, 0, allocator);
215
+ ptr += copy_from_mem(ptr, summary.data(), num_values * sizeof(typename Array::value_type));
217
216
  entries.push_back(Entry(keys[i], std::move(summary)));
218
217
  }
219
218
  }
220
219
  const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
221
220
  const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
222
- return compact_array_of_doubles_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries), num_values);
221
+ return compact_array_tuple_sketch<Array, Allocator>(is_empty, is_ordered, seed_hash, theta, std::move(entries), num_values);
223
222
  }
224
223
 
225
224
  } /* namespace datasketches */
@@ -0,0 +1,81 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef ARRAY_TUPLE_UNION_HPP_
21
+ #define ARRAY_TUPLE_UNION_HPP_
22
+
23
+ #include <vector>
24
+ #include <memory>
25
+ #include "array_tuple_sketch.hpp"
26
+
27
+ #include "tuple_union.hpp"
28
+
29
+ namespace datasketches {
30
+
31
+ /// default array tuple union policy
32
+ template<typename Array>
33
+ struct default_array_tuple_union_policy {
34
+ default_array_tuple_union_policy(uint8_t num_values = 1): num_values_(num_values) {}
35
+
36
+ void operator()(Array& array, const Array& other) const {
37
+ for (uint8_t i = 0; i < num_values_; ++i) {
38
+ array[i] += other[i];
39
+ }
40
+ }
41
+ uint8_t get_num_values() const {
42
+ return num_values_;
43
+ }
44
+ private:
45
+ uint8_t num_values_;
46
+ };
47
+
48
+ /// array tuple union
49
+ template<
50
+ typename Array,
51
+ typename Policy = default_array_tuple_union_policy<Array>,
52
+ typename Allocator = typename Array::allocator_type
53
+ >
54
+ class array_tuple_union: public tuple_union<Array, Policy, Allocator> {
55
+ public:
56
+ using value_type = typename Array::value_type;
57
+ using Base = tuple_union<Array, Policy, Allocator>;
58
+ using CompactSketch = compact_array_tuple_sketch<Array, Allocator>;
59
+ using resize_factor = theta_constants::resize_factor;
60
+
61
+ class builder;
62
+
63
+ CompactSketch get_result(bool ordered = true) const;
64
+
65
+ private:
66
+ // for builder
67
+ array_tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
68
+ };
69
+
70
+ template<typename Array, typename Policy, typename Allocator>
71
+ class array_tuple_union<Array, Policy, Allocator>::builder: public tuple_base_builder<builder, Policy, Allocator> {
72
+ public:
73
+ builder(const Policy& policy = Policy(), const Allocator& allocator = Allocator());
74
+ array_tuple_union build() const;
75
+ };
76
+
77
+ } /* namespace datasketches */
78
+
79
+ #include "array_tuple_union_impl.hpp"
80
+
81
+ #endif
@@ -0,0 +1,43 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ namespace datasketches {
21
+
22
+ template<typename Array, typename Policy, typename Allocator>
23
+ array_tuple_union<Array, Policy, Allocator>::array_tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator):
24
+ Base(lg_cur_size, lg_nom_size, rf, p, theta, seed, policy, allocator)
25
+ {}
26
+
27
+ template<typename Array, typename Policy, typename Allocator>
28
+ auto array_tuple_union<Array, Policy, Allocator>::get_result(bool ordered) const -> CompactSketch {
29
+ return CompactSketch(this->state_.get_policy().get_external_policy().get_num_values(), Base::get_result(ordered));
30
+ }
31
+
32
+ // builder
33
+
34
+ template<typename Array, typename Policy, typename Allocator>
35
+ array_tuple_union<Array, Policy, Allocator>::builder::builder(const Policy& policy, const Allocator& allocator):
36
+ tuple_base_builder<builder, Policy, typename Array::allocator_type>(policy, allocator) {}
37
+
38
+ template<typename Array, typename Policy, typename Allocator>
39
+ auto array_tuple_union<Array, Policy, Allocator>::builder::build() const -> array_tuple_union {
40
+ return array_tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
41
+ }
42
+
43
+ } /* namespace datasketches */
@@ -25,6 +25,7 @@
25
25
 
26
26
  namespace datasketches {
27
27
 
28
+ /// tuple A-not-B
28
29
  template<
29
30
  typename Summary,
30
31
  typename Allocator = std::allocator<Summary>
@@ -37,11 +38,19 @@ public:
37
38
  using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
38
39
  using State = theta_set_difference_base<Entry, ExtractKey, CompactSketch, AllocEntry>;
39
40
 
41
+ /**
42
+ * Constructor
43
+ * @param seed for the hash function that was used to create the sketch
44
+ * @param allocator to use for allocating and deallocating memory
45
+ */
40
46
  explicit tuple_a_not_b(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
41
47
 
42
48
  /**
43
- * Computes the a-not-b set operation given two sketches.
44
- * @return the result of a-not-b
49
+ * Computes the A-not-B set operation given two sketches.
50
+ * @param a sketch A
51
+ * @param b sketch B
52
+ * @param ordered optional flag to specify if an ordered sketch should be produced
53
+ * @return the result of A-not-B as a compact sketch
45
54
  */
46
55
  template<typename FwdSketch, typename Sketch>
47
56
  CompactSketch compute(FwdSketch&& a, const Sketch& b, bool ordered = true) const;
@@ -28,16 +28,17 @@ namespace datasketches {
28
28
  /*
29
29
  // for types with defined + operation
30
30
  template<typename Summary>
31
- struct example_intersection_policy {
31
+ struct example_tuple_intersection_policy {
32
32
  void operator()(Summary& summary, const Summary& other) const {
33
33
  summary += other;
34
34
  }
35
- void operator()(Summary& summary, Summary&& other) const {
36
- summary += other;
37
- }
38
35
  };
39
36
  */
40
37
 
38
+ /**
39
+ * Tuple intersection.
40
+ * Computes intersection of Tuple sketches.
41
+ */
41
42
  template<
42
43
  typename Summary,
43
44
  typename Policy,
@@ -54,19 +55,25 @@ public:
54
55
  // reformulate the external policy that operates on Summary
55
56
  // in terms of operations on Entry
56
57
  struct internal_policy {
57
- internal_policy(const Policy& policy): policy_(policy) {}
58
+ internal_policy(const Policy& external_policy): external_policy_(external_policy) {}
58
59
  void operator()(Entry& internal_entry, const Entry& incoming_entry) const {
59
- policy_(internal_entry.second, incoming_entry.second);
60
+ external_policy_(internal_entry.second, incoming_entry.second);
60
61
  }
61
62
  void operator()(Entry& internal_entry, Entry&& incoming_entry) const {
62
- policy_(internal_entry.second, std::move(incoming_entry.second));
63
+ external_policy_(internal_entry.second, std::move(incoming_entry.second));
63
64
  }
64
- const Policy& get_policy() const { return policy_; }
65
- Policy policy_;
65
+ const Policy& get_external_policy() const { return external_policy_; }
66
+ Policy external_policy_;
66
67
  };
67
68
 
68
69
  using State = theta_intersection_base<Entry, ExtractKey, internal_policy, Sketch, CompactSketch, AllocEntry>;
69
70
 
71
+ /**
72
+ * Constructor
73
+ * @param seed for the hash function that was used to create the sketch
74
+ * @param policy user-defined way of combining Summary during intersection
75
+ * @param allocator to use for allocating and deallocating memory
76
+ */
70
77
  explicit tuple_intersection(uint64_t seed = DEFAULT_SEED, const Policy& policy = Policy(), const Allocator& allocator = Allocator());
71
78
 
72
79
  /**
@@ -82,7 +89,7 @@ public:
82
89
  * Produces a copy of the current state of the intersection.
83
90
  * If update() was not called, the state is the infinite "universe",
84
91
  * which is considered an undefined state, and throws an exception.
85
- * @param ordered optional flag to specify if ordered sketch should be produced
92
+ * @param ordered optional flag to specify if an ordered sketch should be produced
86
93
  * @return the result of the intersection
87
94
  */
88
95
  CompactSketch get_result(bool ordered = true) const;
@@ -26,10 +26,11 @@
26
26
 
27
27
  namespace datasketches {
28
28
 
29
+ /// Tuple Jaccard similarity alias
29
30
  template<
30
31
  typename Summary,
31
32
  typename IntersectionPolicy,
32
- typename UnionPolicy = default_union_policy<Summary>,
33
+ typename UnionPolicy = default_tuple_union_policy<Summary>,
33
34
  typename Allocator = std::allocator<Summary>>
34
35
  using tuple_jaccard_similarity = jaccard_similarity_base<tuple_union<Summary, UnionPolicy, Allocator>, tuple_intersection<Summary, IntersectionPolicy, Allocator>, pair_extract_key<uint64_t, Summary>>;
35
36