datasketches 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +2 -2
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/NOTICE +1 -1
  15. data/vendor/datasketches-cpp/README.md +1 -3
  16. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  21. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  23. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  24. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  25. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  26. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  27. data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
  28. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
  29. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
  30. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
  31. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
  32. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  33. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  34. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  35. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  36. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
  37. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  38. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  42. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  43. data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  46. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
  47. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
  48. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
  49. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  50. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  51. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  52. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  53. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  54. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  55. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  56. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  57. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
  58. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
  59. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  60. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
  62. data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
  63. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  64. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  65. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  66. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  67. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
  70. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  71. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
  74. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
  75. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
  76. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  77. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  78. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  79. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
  80. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
  84. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  85. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  86. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  87. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  88. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  89. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  90. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
  91. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  92. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
  94. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
  95. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
  96. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
  97. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  98. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  99. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  100. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  101. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  102. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  103. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
  107. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
  108. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  110. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  114. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  115. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  116. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  117. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
  120. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
  121. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  122. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  124. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
  125. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
  126. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
  127. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
  130. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  131. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  132. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  133. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  134. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  135. data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
  136. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  137. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  138. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  139. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  140. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  141. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  142. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  143. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  144. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  145. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  146. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  147. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  148. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  149. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  150. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  151. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  152. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
  153. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  154. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  155. metadata +47 -93
  156. data/vendor/datasketches-cpp/MANIFEST.in +0 -39
  157. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  160. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  161. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  162. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  163. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  164. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  165. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  166. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  167. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  168. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
  169. data/vendor/datasketches-cpp/python/README.md +0 -85
  170. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
  171. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
  172. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
  173. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
  174. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
  175. data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
  176. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
  177. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  178. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
  179. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
  180. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  181. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  182. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  183. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  184. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  185. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  186. data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
  187. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
  188. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
  189. data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
  190. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
  191. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
  192. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
  193. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
  194. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  195. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
  196. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
  197. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
  198. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
  199. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
  200. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  201. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  202. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  203. data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
  204. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  205. data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
  206. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
  207. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
  208. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
  209. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
  210. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
  211. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
  212. data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
  213. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  214. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
  215. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  216. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  217. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  218. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  219. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  220. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  221. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  222. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  223. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  224. data/vendor/datasketches-cpp/setup.py +0 -110
  225. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tox.ini +0 -26
  230. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  231. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  232. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  233. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  234. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -26,6 +26,16 @@
26
26
 
27
27
  namespace datasketches {
28
28
 
29
+ // forward declaration
30
+ template<typename A> class theta_union_alloc;
31
+
32
+ // alias with default allocator for convenience
33
+ using theta_union = theta_union_alloc<std::allocator<uint64_t>>;
34
+
35
+ /**
36
+ * Theta Union.
37
+ * Computes union of Theta sketches. There is no constructor. Use builder instead.
38
+ */
29
39
  template<typename Allocator = std::allocator<uint64_t>>
30
40
  class theta_union_alloc {
31
41
  public:
@@ -35,6 +45,7 @@ public:
35
45
  using CompactSketch = compact_theta_sketch_alloc<Allocator>;
36
46
  using resize_factor = theta_constants::resize_factor;
37
47
 
48
+ // there is no payload in Theta sketch entry
38
49
  struct nop_policy {
39
50
  void operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
40
51
  unused(internal_entry);
@@ -47,22 +58,20 @@ public:
47
58
  class builder;
48
59
 
49
60
  /**
50
- * This method is to update the union with a given sketch
61
+ * Update the union with a given sketch
51
62
  * @param sketch to update the union with
52
63
  */
53
64
  template<typename FwdSketch>
54
65
  void update(FwdSketch&& sketch);
55
66
 
56
67
  /**
57
- * This method produces a copy of the current state of the union as a compact sketch.
58
- * @param ordered optional flag to specify if ordered sketch should be produced
68
+ * Produces a copy of the current state of the union as a compact sketch.
69
+ * @param ordered optional flag to specify if an ordered sketch should be produced
59
70
  * @return the result of the union
60
71
  */
61
72
  CompactSketch get_result(bool ordered = true) const;
62
73
 
63
- /**
64
- * Reset the union to the initial empty state
65
- */
74
+ /// Reset the union to the initial empty state
66
75
  void reset();
67
76
 
68
77
  private:
@@ -72,21 +81,19 @@ private:
72
81
  theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Allocator& allocator);
73
82
  };
74
83
 
84
+ /// Theta union builder
75
85
  template<typename A>
76
86
  class theta_union_alloc<A>::builder: public theta_base_builder<builder, A> {
77
87
  public:
78
88
  builder(const A& allocator = A());
79
89
 
80
90
  /**
81
- * This is to create an instance of the union with predefined parameters.
91
+ * Create an instance of the union with predefined parameters.
82
92
  * @return an instance of the union
83
93
  */
84
94
  theta_union_alloc<A> build() const;
85
95
  };
86
96
 
87
- // alias with default allocator for convenience
88
- using theta_union = theta_union_alloc<std::allocator<uint64_t>>;
89
-
90
97
  } /* namespace datasketches */
91
98
 
92
99
  #include "theta_union_impl.hpp"
@@ -42,7 +42,7 @@ void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
42
42
  if (sketch.get_seed_hash() != compute_seed_hash(table_.seed_)) throw std::invalid_argument("seed hash mismatch");
43
43
  table_.is_empty_ = false;
44
44
  union_theta_ = std::min(union_theta_, sketch.get_theta64());
45
- for (auto& entry: sketch) {
45
+ for (auto&& entry: sketch) {
46
46
  const uint64_t hash = EK()(entry);
47
47
  if (hash < union_theta_ && hash < table_.theta_) {
48
48
  auto result = table_.find(hash);
@@ -28,9 +28,9 @@ state_(lg_cur_size, lg_nom_size, rf, p, theta, seed, nop_policy(), allocator)
28
28
  {}
29
29
 
30
30
  template<typename A>
31
- template<typename SS>
32
- void theta_union_alloc<A>::update(SS&& sketch) {
33
- state_.update(std::forward<SS>(sketch));
31
+ template<typename FwdSketch>
32
+ void theta_union_alloc<A>::update(FwdSketch&& sketch) {
33
+ state_.update(std::forward<FwdSketch>(sketch));
34
34
  }
35
35
 
36
36
  template<typename A>
@@ -91,13 +91,14 @@ struct theta_update_sketch_base {
91
91
  static void consolidate_non_empty(Entry* entries, size_t size, size_t num);
92
92
  };
93
93
 
94
- // builder
95
94
 
95
+ /// Theta base builder
96
96
  template<typename Derived, typename Allocator>
97
97
  class theta_base_builder {
98
98
  public:
99
99
  /**
100
100
  * Creates and instance of the builder with default parameters.
101
+ * @param allocator instance of an Allocator to pass to created sketches
101
102
  */
102
103
  theta_base_builder(const Allocator& allocator);
103
104
 
@@ -198,7 +199,8 @@ public:
198
199
  theta_iterator operator++(int);
199
200
  bool operator==(const theta_iterator& other) const;
200
201
  bool operator!=(const theta_iterator& other) const;
201
- Entry& operator*() const;
202
+ reference operator*() const;
203
+ pointer operator->() const;
202
204
 
203
205
  private:
204
206
  Entry* entries_;
@@ -221,6 +223,7 @@ public:
221
223
  bool operator==(const theta_const_iterator& other) const;
222
224
  bool operator!=(const theta_const_iterator& other) const;
223
225
  reference operator*() const;
226
+ pointer operator->() const;
224
227
 
225
228
  private:
226
229
  const Entry* entries_;
@@ -386,6 +386,11 @@ auto theta_iterator<Entry, ExtractKey>::operator*() const -> reference {
386
386
  return entries_[index_];
387
387
  }
388
388
 
389
+ template<typename Entry, typename ExtractKey>
390
+ auto theta_iterator<Entry, ExtractKey>::operator->() const -> pointer {
391
+ return entries_ + index_;
392
+ }
393
+
389
394
  // const iterator
390
395
 
391
396
  template<typename Entry, typename ExtractKey>
@@ -419,10 +424,15 @@ bool theta_const_iterator<Entry, ExtractKey>::operator==(const theta_const_itera
419
424
  }
420
425
 
421
426
  template<typename Entry, typename ExtractKey>
422
- auto theta_const_iterator<Entry, ExtractKey>::operator*() const -> const Entry& {
427
+ auto theta_const_iterator<Entry, ExtractKey>::operator*() const -> reference {
423
428
  return entries_[index_];
424
429
  }
425
430
 
431
+ template<typename Entry, typename ExtractKey>
432
+ auto theta_const_iterator<Entry, ExtractKey>::operator->() const -> pointer {
433
+ return entries_ + index_;
434
+ }
435
+
426
436
  } /* namespace datasketches */
427
437
 
428
438
  #endif
@@ -20,7 +20,6 @@ add_executable(theta_test)
20
20
  target_link_libraries(theta_test theta common_test_lib)
21
21
 
22
22
  set_target_properties(theta_test PROPERTIES
23
- CXX_STANDARD 11
24
23
  CXX_STANDARD_REQUIRED YES
25
24
  )
26
25
 
@@ -46,3 +45,17 @@ target_sources(theta_test
46
45
  theta_setop_test.cpp
47
46
  bit_packing_test.cpp
48
47
  )
48
+
49
+ if (SERDE_COMPAT)
50
+ target_sources(theta_test
51
+ PRIVATE
52
+ theta_sketch_deserialize_from_java_test.cpp
53
+ )
54
+ endif()
55
+
56
+ if (GENERATE)
57
+ target_sources(theta_test
58
+ PRIVATE
59
+ theta_sketch_serialize_for_java.cpp
60
+ )
61
+ endif()
@@ -0,0 +1,57 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch2/catch.hpp>
21
+ #include <fstream>
22
+ #include <theta_sketch.hpp>
23
+
24
+ namespace datasketches {
25
+
26
+ // assume the binary sketches for this test have been generated by datasketches-java code
27
+ // in the subdirectory called "java" in the root directory of this project
28
+ static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";
29
+
30
+ TEST_CASE("theta sketch", "[serde_compat]") {
31
+ const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
32
+ for (const unsigned n: n_arr) {
33
+ std::ifstream is;
34
+ is.exceptions(std::ios::failbit | std::ios::badbit);
35
+ is.open(testBinaryInputPath + "theta_n" + std::to_string(n) + "_java.sk", std::ios::binary);
36
+ const auto sketch = compact_theta_sketch::deserialize(is);
37
+ REQUIRE(sketch.is_empty() == (n == 0));
38
+ REQUIRE(sketch.is_estimation_mode() == (n > 1000));
39
+ REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
40
+ for (const auto hash: sketch) {
41
+ REQUIRE(hash < sketch.get_theta64());
42
+ }
43
+ REQUIRE(sketch.is_ordered());
44
+ REQUIRE(std::is_sorted(sketch.begin(), sketch.end()));
45
+ }
46
+ }
47
+
48
+ TEST_CASE("theta sketch non-empty no entries", "[serde_compat]") {
49
+ std::ifstream is;
50
+ is.exceptions(std::ios::failbit | std::ios::badbit);
51
+ is.open(testBinaryInputPath + "theta_non_empty_no_entries_java.sk", std::ios::binary);
52
+ const auto sketch = compact_theta_sketch::deserialize(is);
53
+ REQUIRE_FALSE(sketch.is_empty());
54
+ REQUIRE(sketch.get_num_retained() == 0);
55
+ }
56
+
57
+ } /* namespace datasketches */
@@ -0,0 +1,61 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch2/catch.hpp>
21
+ #include <fstream>
22
+ #include <theta_sketch.hpp>
23
+
24
+ namespace datasketches {
25
+
26
+ TEST_CASE("theta sketch generate", "[serialize_for_java]") {
27
+ const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
28
+ for (const unsigned n: n_arr) {
29
+ auto sketch = update_theta_sketch::builder().build();
30
+ for (unsigned i = 0; i < n; ++i) sketch.update(i);
31
+ REQUIRE(sketch.is_empty() == (n == 0));
32
+ REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
33
+ std::ofstream os("theta_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
34
+ sketch.compact().serialize(os);
35
+ }
36
+ }
37
+
38
+ TEST_CASE("theta sketch generate compressed", "[serialize_for_java]") {
39
+ const unsigned n_arr[] = {10, 100, 1000, 10000, 100000, 1000000};
40
+ for (const unsigned n: n_arr) {
41
+ auto sketch = update_theta_sketch::builder().build();
42
+ for (unsigned i = 0; i < n; ++i) sketch.update(i);
43
+ REQUIRE_FALSE(sketch.is_empty());
44
+ REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
45
+ std::ofstream os("theta_compressed_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
46
+ sketch.compact().serialize(os);
47
+ }
48
+ }
49
+
50
+ TEST_CASE("theta sketch generate non-empty no entries", "[serialize_for_java]") {
51
+ auto sketch = update_theta_sketch::builder().set_p(0.01).build();
52
+ // here we rely on the fact that hash of 1 happens to be greater than 0.01 (when normalized)
53
+ // and therefore gets rejected
54
+ sketch.update(1);
55
+ REQUIRE_FALSE(sketch.is_empty());
56
+ REQUIRE(sketch.get_num_retained() == 0);
57
+ std::ofstream os("theta_non_empty_no_entries_cpp.sk", std::ios::binary);
58
+ sketch.compact().serialize(os);
59
+ }
60
+
61
+ } /* namespace datasketches */
@@ -167,20 +167,6 @@ TEST_CASE("theta sketch: estimation", "[theta_sketch]") {
167
167
  REQUIRE(compact_sketch.get_upper_bound(1) > n);
168
168
  }
169
169
 
170
- TEST_CASE("theta sketch: deserialize compact empty from java", "[theta_sketch]") {
171
- std::ifstream is;
172
- is.exceptions(std::ios::failbit | std::ios::badbit);
173
- is.open(inputPath + "theta_compact_empty_from_java.sk", std::ios::binary);
174
- auto sketch = compact_theta_sketch::deserialize(is);
175
- REQUIRE(sketch.is_empty());
176
- REQUIRE_FALSE(sketch.is_estimation_mode());
177
- REQUIRE(sketch.get_num_retained() == 0);
178
- REQUIRE(sketch.get_theta() == 1.0);
179
- REQUIRE(sketch.get_estimate() == 0.0);
180
- REQUIRE(sketch.get_lower_bound(1) == 0.0);
181
- REQUIRE(sketch.get_upper_bound(1) == 0.0);
182
- }
183
-
184
170
  TEST_CASE("theta sketch: deserialize compact v1 empty from java", "[theta_sketch]") {
185
171
  std::ifstream is;
186
172
  is.exceptions(std::ios::failbit | std::ios::badbit);
@@ -209,88 +195,6 @@ TEST_CASE("theta sketch: deserialize compact v2 empty from java", "[theta_sketch
209
195
  REQUIRE(sketch.get_upper_bound(1) == 0.0);
210
196
  }
211
197
 
212
- TEST_CASE("theta sketch: deserialize single item from java", "[theta_sketch]") {
213
- std::ifstream is;
214
- is.exceptions(std::ios::failbit | std::ios::badbit);
215
- is.open(inputPath + "theta_compact_single_item_from_java.sk", std::ios::binary);
216
- auto sketch = compact_theta_sketch::deserialize(is);
217
- REQUIRE_FALSE(sketch.is_empty());
218
- REQUIRE_FALSE(sketch.is_estimation_mode());
219
- REQUIRE(sketch.get_num_retained() == 1);
220
- REQUIRE(sketch.get_theta() == 1.0);
221
- REQUIRE(sketch.get_estimate() == 1.0);
222
- REQUIRE(sketch.get_lower_bound(1) == 1.0);
223
- REQUIRE(sketch.get_upper_bound(1) == 1.0);
224
- }
225
-
226
- TEST_CASE("theta sketch: deserialize compact exact from java", "[theta_sketch]") {
227
- std::ifstream is;
228
- is.exceptions(std::ios::failbit | std::ios::badbit);
229
- is.open(inputPath + "theta_compact_exact_from_java.sk", std::ios::binary);
230
- auto sketch = compact_theta_sketch::deserialize(is);
231
- REQUIRE_FALSE(sketch.is_empty());
232
- REQUIRE_FALSE(sketch.is_estimation_mode());
233
- REQUIRE(sketch.is_ordered());
234
- REQUIRE(sketch.get_num_retained() == 100);
235
-
236
- // the same construction process in Java must have produced exactly the same sketch
237
- auto update_sketch = update_theta_sketch::builder().build();
238
- const int n = 100;
239
- for (int i = 0; i < n; i++) update_sketch.update(i);
240
- REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
241
- REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
242
- REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
243
- REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
244
- REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
245
- REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
246
- REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
247
- REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
248
- REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
249
- compact_theta_sketch compact_sketch = update_sketch.compact();
250
- // the sketches are ordered, so the iteration sequence must match exactly
251
- auto iter = sketch.begin();
252
- for (const auto& key: compact_sketch) {
253
- REQUIRE(*iter == key);
254
- ++iter;
255
- }
256
- }
257
-
258
- TEST_CASE("theta sketch: deserialize compact estimation from java", "[theta_sketch]") {
259
- std::ifstream is;
260
- is.exceptions(std::ios::failbit | std::ios::badbit);
261
- is.open(inputPath + "theta_compact_estimation_from_java.sk", std::ios::binary);
262
- auto sketch = compact_theta_sketch::deserialize(is);
263
- REQUIRE_FALSE(sketch.is_empty());
264
- REQUIRE(sketch.is_estimation_mode());
265
- REQUIRE(sketch.is_ordered());
266
- REQUIRE(sketch.get_num_retained() == 4342);
267
- REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
268
- REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
269
- REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
270
- REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
271
-
272
- // the same construction process in Java must have produced exactly the same sketch
273
- update_theta_sketch update_sketch = update_theta_sketch::builder().build();
274
- const int n = 8192;
275
- for (int i = 0; i < n; i++) update_sketch.update(i);
276
- REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
277
- REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
278
- REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
279
- REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
280
- REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
281
- REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
282
- REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
283
- REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
284
- REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
285
- compact_theta_sketch compact_sketch = update_sketch.compact();
286
- // the sketches are ordered, so the iteration sequence must match exactly
287
- auto iter = sketch.begin();
288
- for (const auto& key: compact_sketch) {
289
- REQUIRE(*iter == key);
290
- ++iter;
291
- }
292
- }
293
-
294
198
  TEST_CASE("theta sketch: deserialize compact v1 estimation from java", "[theta_sketch]") {
295
199
  std::ifstream is;
296
200
  is.exceptions(std::ios::failbit | std::ios::badbit);
@@ -473,30 +377,6 @@ TEST_CASE("theta sketch: conversion constructor and wrapped compact", "[theta_sk
473
377
  REQUIRE_THROWS_AS(wrapped_compact_theta_sketch::wrap(bytes.data(), bytes.size(), 0), std::invalid_argument);
474
378
  }
475
379
 
476
- TEST_CASE("theta sketch: wrap compact empty from java", "[theta_sketch]") {
477
- std::ifstream is;
478
- is.exceptions(std::ios::failbit | std::ios::badbit);
479
- is.open(inputPath + "theta_compact_empty_from_java.sk", std::ios::binary | std::ios::ate);
480
-
481
- std::vector<uint8_t> buf;
482
- if(is) {
483
- auto size = is.tellg();
484
- buf.reserve(size);
485
- buf.assign(size, 0);
486
- is.seekg(0, std::ios_base::beg);
487
- is.read((char*)(buf.data()), buf.size());
488
- }
489
-
490
- auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
491
- REQUIRE(sketch.is_empty());
492
- REQUIRE_FALSE(sketch.is_estimation_mode());
493
- REQUIRE(sketch.get_num_retained() == 0);
494
- REQUIRE(sketch.get_theta() == 1.0);
495
- REQUIRE(sketch.get_estimate() == 0.0);
496
- REQUIRE(sketch.get_lower_bound(1) == 0.0);
497
- REQUIRE(sketch.get_upper_bound(1) == 0.0);
498
- }
499
-
500
380
  TEST_CASE("theta sketch: wrap compact v1 empty from java", "[theta_sketch]") {
501
381
  std::ifstream is;
502
382
  is.exceptions(std::ios::failbit | std::ios::badbit);
@@ -545,74 +425,6 @@ TEST_CASE("theta sketch: wrap compact v2 empty from java", "[theta_sketch]") {
545
425
  REQUIRE(sketch.get_upper_bound(1) == 0.0);
546
426
  }
547
427
 
548
- TEST_CASE("theta sketch: wrap single item from java", "[theta_sketch]") {
549
- std::ifstream is;
550
- is.exceptions(std::ios::failbit | std::ios::badbit);
551
- is.open(inputPath + "theta_compact_single_item_from_java.sk", std::ios::binary | std::ios::ate);
552
- std::vector<uint8_t> buf;
553
- if(is) {
554
- auto size = is.tellg();
555
- buf.reserve(size);
556
- buf.assign(size, 0);
557
- is.seekg(0, std::ios_base::beg);
558
- is.read((char*)(buf.data()), buf.size());
559
- }
560
-
561
- auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
562
- REQUIRE_FALSE(sketch.is_empty());
563
- REQUIRE_FALSE(sketch.is_estimation_mode());
564
- REQUIRE(sketch.get_num_retained() == 1);
565
- REQUIRE(sketch.get_theta() == 1.0);
566
- REQUIRE(sketch.get_estimate() == 1.0);
567
- REQUIRE(sketch.get_lower_bound(1) == 1.0);
568
- REQUIRE(sketch.get_upper_bound(1) == 1.0);
569
- }
570
-
571
- TEST_CASE("theta sketch: wrap compact estimation from java", "[theta_sketch]") {
572
- std::ifstream is;
573
- is.exceptions(std::ios::failbit | std::ios::badbit);
574
- is.open(inputPath + "theta_compact_estimation_from_java.sk", std::ios::binary | std::ios::ate);
575
- std::vector<uint8_t> buf;
576
- if(is) {
577
- auto size = is.tellg();
578
- buf.reserve(size);
579
- buf.assign(size, 0);
580
- is.seekg(0, std::ios_base::beg);
581
- is.read((char*)(buf.data()), buf.size());
582
- }
583
-
584
- auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
585
- REQUIRE_FALSE(sketch.is_empty());
586
- REQUIRE(sketch.is_estimation_mode());
587
- REQUIRE(sketch.is_ordered());
588
- REQUIRE(sketch.get_num_retained() == 4342);
589
- REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
590
- REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
591
- REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
592
- REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
593
-
594
- // the same construction process in Java must have produced exactly the same sketch
595
- update_theta_sketch update_sketch = update_theta_sketch::builder().build();
596
- const int n = 8192;
597
- for (int i = 0; i < n; i++) update_sketch.update(i);
598
- REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
599
- REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
600
- REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
601
- REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
602
- REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
603
- REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
604
- REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
605
- REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
606
- REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
607
- compact_theta_sketch compact_sketch = update_sketch.compact();
608
- // the sketches are ordered, so the iteration sequence must match exactly
609
- auto iter = sketch.begin();
610
- for (const auto key: compact_sketch) {
611
- REQUIRE(*iter == key);
612
- ++iter;
613
- }
614
- }
615
-
616
428
  TEST_CASE("theta sketch: wrap compact v1 estimation from java", "[theta_sketch]") {
617
429
  std::ifstream is;
618
430
  is.exceptions(std::ios::failbit | std::ios::badbit);
@@ -47,11 +47,12 @@ install(FILES
47
47
  include/tuple_a_not_b_impl.hpp
48
48
  include/tuple_jaccard_similarity.hpp
49
49
  include/array_of_doubles_sketch.hpp
50
- include/array_of_doubles_sketch_impl.hpp
51
- include/array_of_doubles_union.hpp
52
- include/array_of_doubles_union_impl.hpp
53
- include/array_of_doubles_intersection.hpp
54
- include/array_of_doubles_intersection_impl.hpp
55
- include/array_of_doubles_a_not_b.hpp
56
- include/array_of_doubles_a_not_b_impl.hpp
50
+ include/array_tuple_sketch.hpp
51
+ include/array_tuple_sketch_impl.hpp
52
+ include/array_tuple_union.hpp
53
+ include/array_tuple_union_impl.hpp
54
+ include/array_tuple_intersection.hpp
55
+ include/array_tuple_intersection_impl.hpp
56
+ include/array_tuple_a_not_b.hpp
57
+ include/array_tuple_a_not_b_impl.hpp
57
58
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")