datasketches 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +2 -2
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/NOTICE +1 -1
  15. data/vendor/datasketches-cpp/README.md +1 -3
  16. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  21. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  23. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  24. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  25. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  26. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  27. data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
  28. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
  29. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
  30. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
  31. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
  32. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  33. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  34. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  35. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  36. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
  37. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  38. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  42. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  43. data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  46. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
  47. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
  48. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
  49. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  50. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  51. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  52. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  53. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  54. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  55. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  56. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  57. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
  58. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
  59. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  60. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
  62. data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
  63. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  64. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  65. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  66. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  67. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
  70. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  71. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
  74. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
  75. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
  76. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  77. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  78. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  79. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
  80. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
  84. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  85. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  86. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  87. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  88. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  89. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  90. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
  91. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  92. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
  94. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
  95. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
  96. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
  97. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  98. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  99. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  100. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  101. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  102. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  103. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
  107. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
  108. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  110. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  114. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  115. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  116. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  117. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
  120. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
  121. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  122. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  124. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
  125. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
  126. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
  127. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
  130. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  131. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  132. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  133. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  134. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  135. data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
  136. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  137. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  138. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  139. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  140. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  141. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  142. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  143. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  144. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  145. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  146. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  147. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  148. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  149. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  150. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  151. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  152. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
  153. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  154. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  155. metadata +47 -93
  156. data/vendor/datasketches-cpp/MANIFEST.in +0 -39
  157. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  160. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  161. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  162. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  163. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  164. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  165. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  166. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  167. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  168. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
  169. data/vendor/datasketches-cpp/python/README.md +0 -85
  170. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
  171. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
  172. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
  173. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
  174. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
  175. data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
  176. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
  177. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  178. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
  179. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
  180. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  181. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  182. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  183. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  184. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  185. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  186. data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
  187. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
  188. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
  189. data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
  190. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
  191. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
  192. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
  193. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
  194. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  195. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
  196. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
  197. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
  198. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
  199. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
  200. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  201. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  202. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  203. data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
  204. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  205. data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
  206. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
  207. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
  208. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
  209. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
  210. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
  211. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
  212. data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
  213. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  214. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
  215. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  216. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  217. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  218. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  219. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  220. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  221. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  222. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  223. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  224. data/vendor/datasketches-cpp/setup.py +0 -110
  225. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tox.ini +0 -26
  230. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  231. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  232. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  233. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  234. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -27,7 +27,7 @@
27
27
 
28
28
  namespace datasketches {
29
29
 
30
- // forward-declarations
30
+ // forward declarations
31
31
  template<typename S, typename A> class tuple_sketch;
32
32
  template<typename S, typename U, typename P, typename A> class update_tuple_sketch;
33
33
  template<typename S, typename A> class compact_tuple_sketch;
@@ -43,6 +43,10 @@ struct pair_extract_key {
43
43
  }
44
44
  };
45
45
 
46
+ /**
47
+ * Base class for Tuple sketch.
48
+ * This is an extension of Theta sketch that allows keeping arbitrary Summary associated with each retained key.
49
+ */
46
50
  template<
47
51
  typename Summary,
48
52
  typename Allocator = std::allocator<Summary>
@@ -190,7 +194,7 @@ protected:
190
194
 
191
195
  // for types with defined default constructor and + operation
192
196
  template<typename Summary, typename Update>
193
- struct default_update_policy {
197
+ struct default_tuple_update_policy {
194
198
  Summary create() const {
195
199
  return Summary();
196
200
  }
@@ -199,10 +203,15 @@ struct default_update_policy {
199
203
  }
200
204
  };
201
205
 
206
+ /**
207
+ * Update Tuple sketch.
208
+ * The purpose of this class is to build a Tuple sketch from input data via the update() methods.
209
+ * There is no constructor. Use builder instead.
210
+ */
202
211
  template<
203
212
  typename Summary,
204
213
  typename Update = Summary,
205
- typename Policy = default_update_policy<Summary, Update>,
214
+ typename Policy = default_tuple_update_policy<Summary, Update>,
206
215
  typename Allocator = std::allocator<Summary>
207
216
  >
208
217
  class update_tuple_sketch: public tuple_sketch<Summary, Allocator> {
@@ -244,21 +253,24 @@ public:
244
253
 
245
254
  /**
246
255
  * Update this sketch with a given string.
247
- * @param value string to update the sketch with
256
+ * @param key string to update the sketch with
257
+ * @param value to update the sketch with
248
258
  */
249
259
  template<typename FwdUpdate>
250
260
  inline void update(const std::string& key, FwdUpdate&& value);
251
261
 
252
262
  /**
253
263
  * Update this sketch with a given unsigned 64-bit integer.
254
- * @param value uint64_t to update the sketch with
264
+ * @param key uint64_t to update the sketch with
265
+ * @param value to update the sketch with
255
266
  */
256
267
  template<typename FwdUpdate>
257
268
  inline void update(uint64_t key, FwdUpdate&& value);
258
269
 
259
270
  /**
260
271
  * Update this sketch with a given signed 64-bit integer.
261
- * @param value int64_t to update the sketch with
272
+ * @param key int64_t to update the sketch with
273
+ * @param value to update the sketch with
262
274
  */
263
275
  template<typename FwdUpdate>
264
276
  inline void update(int64_t key, FwdUpdate&& value);
@@ -266,7 +278,8 @@ public:
266
278
  /**
267
279
  * Update this sketch with a given unsigned 32-bit integer.
268
280
  * For compatibility with Java implementation.
269
- * @param value uint32_t to update the sketch with
281
+ * @param key uint32_t to update the sketch with
282
+ * @param value to update the sketch with
270
283
  */
271
284
  template<typename FwdUpdate>
272
285
  inline void update(uint32_t key, FwdUpdate&& value);
@@ -274,7 +287,8 @@ public:
274
287
  /**
275
288
  * Update this sketch with a given signed 32-bit integer.
276
289
  * For compatibility with Java implementation.
277
- * @param value int32_t to update the sketch with
290
+ * @param key int32_t to update the sketch with
291
+ * @param value to update the sketch with
278
292
  */
279
293
  template<typename FwdUpdate>
280
294
  inline void update(int32_t key, FwdUpdate&& value);
@@ -282,7 +296,8 @@ public:
282
296
  /**
283
297
  * Update this sketch with a given unsigned 16-bit integer.
284
298
  * For compatibility with Java implementation.
285
- * @param value uint16_t to update the sketch with
299
+ * @param key uint16_t to update the sketch with
300
+ * @param value to update the sketch with
286
301
  */
287
302
  template<typename FwdUpdate>
288
303
  inline void update(uint16_t key, FwdUpdate&& value);
@@ -290,7 +305,8 @@ public:
290
305
  /**
291
306
  * Update this sketch with a given signed 16-bit integer.
292
307
  * For compatibility with Java implementation.
293
- * @param value int16_t to update the sketch with
308
+ * @param key int16_t to update the sketch with
309
+ * @param value to update the sketch with
294
310
  */
295
311
  template<typename FwdUpdate>
296
312
  inline void update(int16_t key, FwdUpdate&& value);
@@ -298,7 +314,8 @@ public:
298
314
  /**
299
315
  * Update this sketch with a given unsigned 8-bit integer.
300
316
  * For compatibility with Java implementation.
301
- * @param value uint8_t to update the sketch with
317
+ * @param key uint8_t to update the sketch with
318
+ * @param value to update the sketch with
302
319
  */
303
320
  template<typename FwdUpdate>
304
321
  inline void update(uint8_t key, FwdUpdate&& value);
@@ -306,7 +323,8 @@ public:
306
323
  /**
307
324
  * Update this sketch with a given signed 8-bit integer.
308
325
  * For compatibility with Java implementation.
309
- * @param value int8_t to update the sketch with
326
+ * @param key int8_t to update the sketch with
327
+ * @param value to update the sketch with
310
328
  */
311
329
  template<typename FwdUpdate>
312
330
  inline void update(int8_t key, FwdUpdate&& value);
@@ -314,7 +332,8 @@ public:
314
332
  /**
315
333
  * Update this sketch with a given double-precision floating point value.
316
334
  * For compatibility with Java implementation.
317
- * @param value double to update the sketch with
335
+ * @param key double to update the sketch with
336
+ * @param value to update the sketch with
318
337
  */
319
338
  template<typename FwdUpdate>
320
339
  inline void update(double key, FwdUpdate&& value);
@@ -322,7 +341,8 @@ public:
322
341
  /**
323
342
  * Update this sketch with a given floating point value.
324
343
  * For compatibility with Java implementation.
325
- * @param value float to update the sketch with
344
+ * @param key float to update the sketch with
345
+ * @param value to update the sketch with
326
346
  */
327
347
  template<typename FwdUpdate>
328
348
  inline void update(float key, FwdUpdate&& value);
@@ -337,8 +357,9 @@ public:
337
357
  * Otherwise two sketches that should represent overlapping sets will be disjoint
338
358
  * For instance, for signed 32-bit values call update(int32_t) method above,
339
359
  * which does widening conversion to int64_t, if compatibility with Java is expected
340
- * @param data pointer to the data
360
+ * @param key pointer to the data
341
361
  * @param length of the data in bytes
362
+ * @param value to update the sketch with
342
363
  */
343
364
  template<typename FwdUpdate>
344
365
  void update(const void* key, size_t length, FwdUpdate&& value);
@@ -355,7 +376,7 @@ public:
355
376
 
356
377
  /**
357
378
  * Converts this sketch to a compact sketch (ordered or unordered).
358
- * @param ordered optional flag to specify if ordered sketch should be produced
379
+ * @param ordered optional flag to specify if an ordered sketch should be produced
359
380
  * @return compact sketch
360
381
  */
361
382
  compact_tuple_sketch<Summary, Allocator> compact(bool ordered = true) const;
@@ -375,8 +396,10 @@ protected:
375
396
  virtual void print_specifics(std::ostringstream& os) const;
376
397
  };
377
398
 
378
- // compact sketch
379
-
399
+ /**
400
+ * Compact Tuple sketch.
401
+ * This is an immutable form of the Tuple sketch, the form that can be serialized and deserialized.
402
+ */
380
403
  template<
381
404
  typename Summary,
382
405
  typename Allocator = std::allocator<Summary>
@@ -406,13 +429,48 @@ public:
406
429
  // - as a result of a set operation
407
430
  // - by deserializing a previously serialized compact sketch
408
431
 
432
+ /**
433
+ * Copy constructor.
434
+ * Constructs a compact sketch from another sketch (either update or compact)
435
+ * @param other sketch to be copied
436
+ * @param ordered if true make the resulting sketch ordered
437
+ */
409
438
  compact_tuple_sketch(const Base& other, bool ordered);
410
- compact_tuple_sketch(const compact_tuple_sketch&) = default;
439
+
440
+ /**
441
+ * Copy constructor.
442
+ * @param other sketch to be copied
443
+ */
444
+ compact_tuple_sketch(const compact_tuple_sketch& other) = default;
445
+
446
+ /**
447
+ * Move constructor.
448
+ * @param other sketch to be moved
449
+ */
411
450
  compact_tuple_sketch(compact_tuple_sketch&&) noexcept;
451
+
412
452
  virtual ~compact_tuple_sketch() = default;
413
- compact_tuple_sketch& operator=(const compact_tuple_sketch&) = default;
414
- compact_tuple_sketch& operator=(compact_tuple_sketch&&) = default;
415
453
 
454
+ /**
455
+ * Copy assignment
456
+ * @param other sketch to be copied
457
+ * @return reference to this sketch
458
+ */
459
+ compact_tuple_sketch& operator=(const compact_tuple_sketch& other) = default;
460
+
461
+ /**
462
+ * Move assignment
463
+ * @param other sketch to be moved
464
+ * @return reference to this sketch
465
+ */
466
+ compact_tuple_sketch& operator=(compact_tuple_sketch&& other) = default;
467
+
468
+ /**
469
+ * Constructor from Theta sketch
470
+ * @param other Theta sketch to be constructed from
471
+ * @param summary Summary instance to be associated with each entry
472
+ * @param ordered if true make the resulting sketch ordered
473
+ */
416
474
  compact_tuple_sketch(const theta_sketch_alloc<AllocU64>& other, const Summary& summary, bool ordered = true);
417
475
 
418
476
  virtual Allocator get_allocator() const;
@@ -425,7 +483,7 @@ public:
425
483
  /**
426
484
  * This method serializes the sketch into a given stream in a binary form
427
485
  * @param os output stream
428
- * @param instance of a SerDe
486
+ * @param sd instance of a SerDe
429
487
  */
430
488
  template<typename SerDe = serde<Summary>>
431
489
  void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
@@ -436,7 +494,7 @@ public:
436
494
  * It is a blank space of a given size.
437
495
  * This header is used in Datasketches PostgreSQL extension.
438
496
  * @param header_size_bytes space to reserve in front of the sketch
439
- * @param instance of a SerDe
497
+ * @param sd instance of a SerDe
440
498
  * @return serialized sketch as a vector of bytes
441
499
  */
442
500
  template<typename SerDe = serde<Summary>>
@@ -451,8 +509,8 @@ public:
451
509
  * This method deserializes a sketch from a given stream.
452
510
  * @param is input stream
453
511
  * @param seed the seed for the hash function that was used to create the sketch
454
- * @param instance of a SerDe
455
- * @param instance of an Allocator
512
+ * @param sd instance of a SerDe
513
+ * @param allocator instance of an Allocator
456
514
  * @return an instance of a sketch
457
515
  */
458
516
  template<typename SerDe = serde<Summary>>
@@ -464,17 +522,14 @@ public:
464
522
  * @param bytes pointer to the array of bytes
465
523
  * @param size the size of the array
466
524
  * @param seed the seed for the hash function that was used to create the sketch
467
- * @param instance of a SerDe
468
- * @param instance of an Allocator
525
+ * @param sd instance of a SerDe
526
+ * @param allocator instance of an Allocator
469
527
  * @return an instance of the sketch
470
528
  */
471
529
  template<typename SerDe = serde<Summary>>
472
530
  static compact_tuple_sketch deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED,
473
531
  const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
474
532
 
475
- // for internal use
476
- compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries);
477
-
478
533
  protected:
479
534
  bool is_empty_;
480
535
  bool is_ordered_;
@@ -520,10 +575,14 @@ protected:
520
575
 
521
576
  virtual void print_specifics(std::ostringstream& os) const;
522
577
 
523
- };
578
+ template<typename E, typename EK, typename P, typename S, typename CS, typename A> friend class theta_union_base;
579
+ template<typename E, typename EK, typename P, typename S, typename CS, typename A> friend class theta_intersection_base;
580
+ template<typename E, typename EK, typename CS, typename A> friend class theta_set_difference_base;
581
+ compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries);
524
582
 
525
- // builder
583
+ };
526
584
 
585
+ /// Tuple base builder
527
586
  template<typename Derived, typename Policy, typename Allocator>
528
587
  class tuple_base_builder: public theta_base_builder<Derived, Allocator> {
529
588
  public:
@@ -533,11 +592,15 @@ protected:
533
592
  Policy policy_;
534
593
  };
535
594
 
595
+ /// Update Tuple sketch builder
536
596
  template<typename S, typename U, typename P, typename A>
537
597
  class update_tuple_sketch<S, U, P, A>::builder: public tuple_base_builder<builder, P, A> {
538
598
  public:
539
599
  /**
600
+ * Constructor
540
601
  * Creates and instance of the builder with default parameters.
602
+ * @param policy user-defined way of creating and updating Summary
603
+ * @param allocator instance of an Allocator to pass to created sketches
541
604
  */
542
605
  builder(const P& policy = P(), const A& allocator = A());
543
606
 
@@ -27,15 +27,19 @@ namespace datasketches {
27
27
 
28
28
  // for types with defined + operation
29
29
  template<typename Summary>
30
- struct default_union_policy {
30
+ struct default_tuple_union_policy {
31
31
  void operator()(Summary& summary, const Summary& other) const {
32
32
  summary += other;
33
33
  }
34
34
  };
35
35
 
36
+ /**
37
+ * Tuple Union.
38
+ * Computes union of Tuple sketches. There is no constructor. Use builder instead.
39
+ */
36
40
  template<
37
41
  typename Summary,
38
- typename Policy = default_union_policy<Summary>,
42
+ typename Policy = default_tuple_union_policy<Summary>,
39
43
  typename Allocator = std::allocator<Summary>
40
44
  >
41
45
  class tuple_union {
@@ -50,15 +54,15 @@ public:
50
54
  // reformulate the external policy that operates on Summary
51
55
  // in terms of operations on Entry
52
56
  struct internal_policy {
53
- internal_policy(const Policy& policy): policy_(policy) {}
57
+ internal_policy(const Policy& external_policy): external_policy_(external_policy) {}
54
58
  void operator()(Entry& internal_entry, const Entry& incoming_entry) const {
55
- policy_(internal_entry.second, incoming_entry.second);
59
+ external_policy_(internal_entry.second, incoming_entry.second);
56
60
  }
57
61
  void operator()(Entry& internal_entry, Entry&& incoming_entry) const {
58
- policy_(internal_entry.second, std::move(incoming_entry.second));
62
+ external_policy_(internal_entry.second, std::move(incoming_entry.second));
59
63
  }
60
- const Policy& get_policy() const { return policy_; }
61
- Policy policy_;
64
+ const Policy& get_external_policy() const { return external_policy_; }
65
+ Policy external_policy_;
62
66
  };
63
67
 
64
68
  using State = theta_union_base<Entry, ExtractKey, internal_policy, Sketch, CompactSketch, AllocEntry>;
@@ -67,15 +71,15 @@ public:
67
71
  class builder;
68
72
 
69
73
  /**
70
- * This method is to update the union with a given sketch
74
+ * Update the union with a given sketch
71
75
  * @param sketch to update the union with
72
76
  */
73
77
  template<typename FwdSketch>
74
78
  void update(FwdSketch&& sketch);
75
79
 
76
80
  /**
77
- * This method produces a copy of the current state of the union as a compact sketch.
78
- * @param ordered optional flag to specify if ordered sketch should be produced
81
+ * Produces a copy of the current state of the union as a compact sketch.
82
+ * @param ordered optional flag to specify if an ordered sketch should be produced
79
83
  * @return the result of the union
80
84
  */
81
85
  CompactSketch get_result(bool ordered = true) const;
@@ -92,16 +96,20 @@ protected:
92
96
  tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
93
97
  };
94
98
 
99
+ /// Tuple union builder
95
100
  template<typename S, typename P, typename A>
96
101
  class tuple_union<S, P, A>::builder: public tuple_base_builder<builder, P, A> {
97
102
  public:
98
103
  /**
104
+ * Constructor.
99
105
  * Creates and instance of the builder with default parameters.
106
+ * @param policy
107
+ * @param allocator
100
108
  */
101
109
  builder(const P& policy = P(), const A& allocator = A());
102
110
 
103
111
  /**
104
- * This is to create an instance of the union with predefined parameters.
112
+ * Create an instance of the union with predefined parameters.
105
113
  * @return an instance of the union
106
114
  */
107
115
  tuple_union build() const;
@@ -20,7 +20,6 @@ add_executable(tuple_test)
20
20
  target_link_libraries(tuple_test tuple common_test_lib)
21
21
 
22
22
  set_target_properties(tuple_test PROPERTIES
23
- CXX_STANDARD 11
24
23
  CXX_STANDARD_REQUIRED YES
25
24
  )
26
25
 
@@ -47,3 +46,19 @@ target_sources(tuple_test
47
46
  array_of_doubles_sketch_test.cpp
48
47
  engagement_test.cpp
49
48
  )
49
+
50
+ if (SERDE_COMPAT)
51
+ target_sources(tuple_test
52
+ PRIVATE
53
+ aod_sketch_deserialize_from_java_test.cpp
54
+ tuple_sketch_deserialize_from_java_test.cpp
55
+ )
56
+ endif()
57
+
58
+ if (GENERATE)
59
+ target_sources(tuple_test
60
+ PRIVATE
61
+ aod_sketch_serialize_for_java.cpp
62
+ tuple_sketch_serialize_for_java.cpp
63
+ )
64
+ endif()
@@ -0,0 +1,76 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch2/catch.hpp>
21
+ #include <fstream>
22
+
23
+ #include "array_of_doubles_sketch.hpp"
24
+
25
+ namespace datasketches {
26
+
27
+ // assume the binary sketches for this test have been generated by datasketches-java code
28
+ // in the subdirectory called "java" in the root directory of this project
29
+ static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";
30
+
31
+ TEST_CASE("aod sketch one value", "[serde_compat]") {
32
+ const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
33
+ for (const unsigned n: n_arr) {
34
+ std::ifstream is;
35
+ is.exceptions(std::ios::failbit | std::ios::badbit);
36
+ is.open(testBinaryInputPath + "aod_1_n" + std::to_string(n) + "_java.sk", std::ios::binary);
37
+ const auto sketch = compact_array_of_doubles_sketch::deserialize(is);
38
+ REQUIRE(sketch.is_empty() == (n == 0));
39
+ REQUIRE(sketch.is_estimation_mode() == (n > 1000));
40
+ REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
41
+ REQUIRE(sketch.get_num_values() == 1);
42
+ for (const auto& entry: sketch) {
43
+ REQUIRE(entry.first < sketch.get_theta64());
44
+ }
45
+ }
46
+ }
47
+
48
+ TEST_CASE("aod sketch three values", "[serde_compat]") {
49
+ const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
50
+ for (const unsigned n: n_arr) {
51
+ std::ifstream is;
52
+ is.exceptions(std::ios::failbit | std::ios::badbit);
53
+ is.open(testBinaryInputPath + "aod_3_n" + std::to_string(n) + "_java.sk", std::ios::binary);
54
+ const auto sketch = compact_array_of_doubles_sketch::deserialize(is);
55
+ REQUIRE(sketch.is_empty() == (n == 0));
56
+ REQUIRE(sketch.is_estimation_mode() == (n > 1000));
57
+ REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
58
+ REQUIRE(sketch.get_num_values() == 3);
59
+ for (const auto& entry: sketch) {
60
+ REQUIRE(entry.first < sketch.get_theta64());
61
+ REQUIRE(entry.second[0] == entry.second[1]);
62
+ REQUIRE(entry.second[0] == entry.second[2]);
63
+ }
64
+ }
65
+ }
66
+
67
+ TEST_CASE("aod sketch non-empty no entries", "[serde_compat]") {
68
+ std::ifstream is;
69
+ is.exceptions(std::ios::failbit | std::ios::badbit);
70
+ is.open(testBinaryInputPath + "aod_1_non_empty_no_entries_java.sk", std::ios::binary);
71
+ const auto sketch = compact_array_of_doubles_sketch::deserialize(is);
72
+ REQUIRE_FALSE(sketch.is_empty());
73
+ REQUIRE(sketch.get_num_retained() == 0);
74
+ }
75
+
76
+ } /* namespace datasketches */
@@ -0,0 +1,62 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch2/catch.hpp>
21
+ #include <fstream>
22
+
23
+ #include "array_of_doubles_sketch.hpp"
24
+
25
+ namespace datasketches {
26
+
27
+ TEST_CASE("aod sketch generate one value", "[serialize_for_java]") {
28
+ const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
29
+ for (const unsigned n: n_arr) {
30
+ auto sketch = update_array_of_doubles_sketch::builder().build();
31
+ for (unsigned i = 0; i < n; ++i) sketch.update(i, std::vector<double>(1, i));
32
+ REQUIRE(sketch.is_empty() == (n == 0));
33
+ REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
34
+ std::ofstream os("aod_1_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
35
+ sketch.compact().serialize(os);
36
+ }
37
+ }
38
+
39
+ TEST_CASE("aod sketch generate three values", "[serialize_for_java]") {
40
+ const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
41
+ for (const unsigned n: n_arr) {
42
+ auto sketch = update_array_of_doubles_sketch::builder(3).build();
43
+ for (unsigned i = 0; i < n; ++i) sketch.update(i, std::vector<double>(3, i));
44
+ REQUIRE(sketch.is_empty() == (n == 0));
45
+ REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
46
+ std::ofstream os("aod_3_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
47
+ sketch.compact().serialize(os);
48
+ }
49
+ }
50
+
51
+ TEST_CASE("aod sketch generate non-empty no entries", "[serialize_for_java]") {
52
+ auto sketch = update_array_of_doubles_sketch::builder().set_p(0.01).build();
53
+ // here we rely on the fact that hash of 1 happens to be greater than 0.01 (when normalized)
54
+ // and therefore gets rejected
55
+ sketch.update(1, std::vector<double>({1}));
56
+ REQUIRE_FALSE(sketch.is_empty());
57
+ REQUIRE(sketch.get_num_retained() == 0);
58
+ std::ofstream os("aod_1_non_empty_no_entries_cpp.sk", std::ios::binary);
59
+ sketch.compact().serialize(os);
60
+ }
61
+
62
+ } /* namespace datasketches */