datasketches 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +2 -2
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/NOTICE +1 -1
  15. data/vendor/datasketches-cpp/README.md +1 -3
  16. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  21. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  23. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  24. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  25. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  26. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  27. data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
  28. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
  29. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
  30. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
  31. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
  32. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  33. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  34. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  35. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  36. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
  37. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  38. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  42. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  43. data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  46. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
  47. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
  48. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
  49. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  50. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  51. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  52. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  53. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  54. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  55. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  56. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  57. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
  58. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
  59. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  60. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
  62. data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
  63. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  64. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  65. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  66. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  67. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
  70. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  71. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
  74. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
  75. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
  76. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  77. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  78. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  79. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
  80. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
  84. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  85. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  86. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  87. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  88. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  89. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  90. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
  91. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  92. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
  94. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
  95. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
  96. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
  97. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  98. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  99. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  100. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  101. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  102. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  103. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
  107. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
  108. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  110. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  114. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  115. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  116. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  117. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
  120. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
  121. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  122. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  124. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
  125. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
  126. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
  127. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
  130. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  131. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  132. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  133. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  134. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  135. data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
  136. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  137. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  138. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  139. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  140. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  141. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  142. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  143. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  144. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  145. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  146. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  147. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  148. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  149. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  150. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  151. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  152. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
  153. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  154. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  155. metadata +47 -93
  156. data/vendor/datasketches-cpp/MANIFEST.in +0 -39
  157. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  160. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  161. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  162. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  163. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  164. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  165. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  166. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  167. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  168. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
  169. data/vendor/datasketches-cpp/python/README.md +0 -85
  170. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
  171. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
  172. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
  173. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
  174. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
  175. data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
  176. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
  177. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  178. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
  179. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
  180. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  181. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  182. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  183. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  184. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  185. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  186. data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
  187. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
  188. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
  189. data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
  190. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
  191. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
  192. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
  193. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
  194. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  195. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
  196. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
  197. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
  198. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
  199. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
  200. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  201. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  202. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  203. data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
  204. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  205. data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
  206. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
  207. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
  208. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
  209. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
  210. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
  211. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
  212. data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
  213. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  214. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
  215. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  216. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  217. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  218. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  219. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  220. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  221. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  222. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  223. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  224. data/vendor/datasketches-cpp/setup.py +0 -110
  225. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tox.ini +0 -26
  230. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  231. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  232. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  233. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  234. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -23,10 +23,8 @@
23
23
  #include <array>
24
24
 
25
25
  #include <catch2/catch.hpp>
26
- #include <array_of_doubles_sketch.hpp>
27
- #include <array_of_doubles_union.hpp>
28
- #include <array_of_doubles_intersection.hpp>
29
- #include <array_of_doubles_a_not_b.hpp>
26
+
27
+ #include "array_of_doubles_sketch.hpp"
30
28
 
31
29
  namespace datasketches {
32
30
 
@@ -47,130 +45,6 @@ TEST_CASE("aod sketch: reset", "[tuple_sketch]") {
47
45
  REQUIRE(update_sketch.get_num_retained() == 0);
48
46
  }
49
47
 
50
- TEST_CASE("aod sketch: serialization compatibility with java - empty", "[tuple_sketch]") {
51
- auto update_sketch = update_array_of_doubles_sketch::builder().build();
52
- REQUIRE(update_sketch.is_empty());
53
- REQUIRE(update_sketch.get_num_retained() == 0);
54
- auto compact_sketch = update_sketch.compact();
55
-
56
- // read binary sketch from Java
57
- std::ifstream is;
58
- is.exceptions(std::ios::failbit | std::ios::badbit);
59
- is.open(inputPath + "aod_1_compact_empty_from_java.sk", std::ios::binary);
60
- auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is);
61
- REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained());
62
- REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10));
63
- REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10));
64
- REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10));
65
- REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10));
66
- }
67
-
68
- TEST_CASE("aod sketch: serialization compatibility with java - empty configured for three values", "[tuple_sketch]") {
69
- auto update_sketch = update_array_of_doubles_sketch::builder(3).build();
70
- REQUIRE(update_sketch.is_empty());
71
- REQUIRE(update_sketch.get_num_retained() == 0);
72
- REQUIRE(update_sketch.get_num_values() == 3);
73
- auto compact_sketch = update_sketch.compact();
74
-
75
- // read binary sketch from Java
76
- std::ifstream is;
77
- is.exceptions(std::ios::failbit | std::ios::badbit);
78
- is.open(inputPath + "aod_3_compact_empty_from_java.sk", std::ios::binary);
79
- auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is);
80
- REQUIRE(compact_sketch.get_num_values() == compact_sketch_from_java.get_num_values());
81
- REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained());
82
- REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10));
83
- REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10));
84
- REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10));
85
- REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10));
86
- }
87
-
88
- TEST_CASE("aod sketch: serialization compatibility with java - non-empty no entries", "[tuple_sketch]") {
89
- auto update_sketch = update_array_of_doubles_sketch::builder().set_p(0.01f).build();
90
- std::vector<double> a = {1};
91
- update_sketch.update(1, a);
92
- REQUIRE_FALSE(update_sketch.is_empty());
93
- REQUIRE(update_sketch.get_num_retained() == 0);
94
- auto compact_sketch = update_sketch.compact();
95
-
96
- // read binary sketch from Java
97
- std::ifstream is;
98
- is.exceptions(std::ios::failbit | std::ios::badbit);
99
- is.open(inputPath + "aod_1_compact_non_empty_no_entries_from_java.sk", std::ios::binary);
100
- auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is);
101
- REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained());
102
- REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10));
103
- REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10));
104
- REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10));
105
- REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10));
106
- }
107
-
108
- TEST_CASE("aod sketch: serialization compatibility with java - estimation mode", "[tuple_sketch]") {
109
- auto update_sketch = update_array_of_doubles_sketch::builder().build();
110
- std::vector<double> a = {1};
111
- for (int i = 0; i < 8192; ++i) update_sketch.update(i, a);
112
- auto compact_sketch = update_sketch.compact();
113
-
114
- // read binary sketch from Java
115
- std::ifstream is;
116
- is.exceptions(std::ios::failbit | std::ios::badbit);
117
- is.open(inputPath + "aod_1_compact_estimation_from_java.sk", std::ios::binary);
118
- auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is);
119
- REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained());
120
- REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10));
121
- REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10));
122
- REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10));
123
- REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10));
124
- REQUIRE(compact_sketch.get_lower_bound(2) == Approx(compact_sketch_from_java.get_lower_bound(2)).margin(1e-10));
125
- REQUIRE(compact_sketch.get_upper_bound(2) == Approx(compact_sketch_from_java.get_upper_bound(2)).margin(1e-10));
126
- REQUIRE(compact_sketch.get_lower_bound(3) == Approx(compact_sketch_from_java.get_lower_bound(3)).margin(1e-10));
127
- REQUIRE(compact_sketch.get_upper_bound(3) == Approx(compact_sketch_from_java.get_upper_bound(3)).margin(1e-10));
128
-
129
- // sketch from Java is not ordered
130
- // transform it to ordered so that iteration sequence would match exactly
131
- compact_array_of_doubles_sketch ordered_sketch_from_java(compact_sketch_from_java, true);
132
- auto it = ordered_sketch_from_java.begin();
133
- for (const auto& entry: compact_sketch) {
134
- REQUIRE(entry == *it);
135
- ++it;
136
- }
137
- }
138
-
139
- TEST_CASE("aod sketch: serialization compatibility with java - exact mode with two values", "[tuple_sketch]") {
140
- auto update_sketch = update_array_of_doubles_sketch::builder(2).build();
141
- std::vector<double> a = {1, 2};
142
- for (int i = 0; i < 1000; ++i) update_sketch.update(i, a.data()); // pass vector as pointer
143
- auto compact_sketch = update_sketch.compact();
144
- REQUIRE_FALSE(compact_sketch.is_estimation_mode());
145
-
146
- // read binary sketch from Java
147
- std::ifstream is;
148
- is.exceptions(std::ios::failbit | std::ios::badbit);
149
- is.open(inputPath + "aod_2_compact_exact_from_java.sk", std::ios::binary);
150
- auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is);
151
- REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained());
152
- REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10));
153
- REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10));
154
- REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10));
155
- REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10));
156
- REQUIRE(compact_sketch.get_lower_bound(2) == Approx(compact_sketch_from_java.get_lower_bound(2)).margin(1e-10));
157
- REQUIRE(compact_sketch.get_upper_bound(2) == Approx(compact_sketch_from_java.get_upper_bound(2)).margin(1e-10));
158
- REQUIRE(compact_sketch.get_lower_bound(3) == Approx(compact_sketch_from_java.get_lower_bound(3)).margin(1e-10));
159
- REQUIRE(compact_sketch.get_upper_bound(3) == Approx(compact_sketch_from_java.get_upper_bound(3)).margin(1e-10));
160
-
161
- // sketch from Java is not ordered
162
- // transform it to ordered so that iteration sequence would match exactly
163
- compact_array_of_doubles_sketch ordered_sketch_from_java(compact_sketch_from_java, true);
164
- auto it = ordered_sketch_from_java.begin();
165
- for (const auto& entry: compact_sketch) {
166
- REQUIRE(entry.first == (*it).first);
167
- REQUIRE(entry.second.size() == 2);
168
- REQUIRE(entry.second[0] == (*it).second[0]);
169
- REQUIRE(entry.second[1] == (*it).second[1]);
170
- ++it;
171
- }
172
- }
173
-
174
48
  TEST_CASE("aod sketch: stream serialize deserialize - estimation mode", "[tuple_sketch]") {
175
49
  auto update_sketch = update_array_of_doubles_sketch::builder(2).build();
176
50
  std::vector<double> a = {1, 2};
@@ -290,7 +164,9 @@ TEST_CASE("aod intersection: half overlap", "[tuple_sketch]") {
290
164
  auto update_sketch2 = update_array_of_doubles_sketch::builder().build();
291
165
  for (int i = 500; i < 1500; ++i) update_sketch2.update(i, a);
292
166
 
293
- array_of_doubles_intersection<array_of_doubles_union_policy> intersection;
167
+ // there is no default policy for intersection
168
+ // let's combine values the same way as in union for testing
169
+ array_of_doubles_intersection<default_array_of_doubles_union_policy> intersection;
294
170
  intersection.update(update_sketch1);
295
171
  intersection.update(update_sketch2);
296
172
  auto result = intersection.get_result();
@@ -42,11 +42,11 @@ class always_one_policy {
42
42
  public:
43
43
  always_one_policy(): initial_value(1) {}
44
44
  T create() const { return 1; }
45
- void update(T&, const T&) const { }
45
+ void update(T&, const T&) const {}
46
46
  private:
47
47
  T initial_value;
48
48
  };
49
- using always_one_tuple_sketch = datasketches::update_tuple_sketch<int, int, always_one_policy<int>> ;
49
+ using always_one_tuple_sketch = datasketches::update_tuple_sketch<int, int, always_one_policy<int>>;
50
50
 
51
51
  template<typename T>
52
52
  class update_sum_value_policy {
@@ -66,20 +66,20 @@ struct union_sum_value_policy {
66
66
  }
67
67
  };
68
68
 
69
- using sum_union_tuple_sketch = datasketches::tuple_union<int, union_sum_value_policy<int>> ;
69
+ using sum_union_tuple_sketch = datasketches::tuple_union<int, union_sum_value_policy<int>>;
70
70
 
71
71
 
72
- class EngagementTest{
72
+ class EngagementTest {
73
73
  public:
74
- int num_std_dev = 2 ;
75
- void test_always_one_update(){
74
+ uint8_t num_std_dev = 2;
75
+ void test_always_one_update() {
76
76
  /*
77
77
  * Tests that updates into an update_tuple_sketch sketch only keeps a 1 in the column for stored values.
78
78
  */
79
- int lgK = 8 ;
80
- std::vector<datasketches::update_tuple_sketch<int, int, always_one_policy<int>>> sketch_array ;
79
+ uint8_t lgK = 8;
80
+ std::vector<datasketches::update_tuple_sketch<int, int, always_one_policy<int>>> sketch_array;
81
81
 
82
- auto always_one_sketch = always_one_tuple_sketch::builder(always_one_policy<int>()).set_lg_k(lgK).build() ;
82
+ auto always_one_sketch = always_one_tuple_sketch::builder(always_one_policy<int>()).set_lg_k(lgK).build();
83
83
 
84
84
  always_one_sketch.update(1, 1);
85
85
  always_one_sketch.update(1, 2);
@@ -97,12 +97,12 @@ public:
97
97
  REQUIRE(sum == 3); // we only keep 1 for every stored key.
98
98
  }
99
99
 
100
- void test_sum_update_policy(){
100
+ void test_sum_update_policy() {
101
101
  /*
102
102
  * Tests that updates into an sum_update_tuple_sketch sum the stored values on updates.
103
103
  */
104
- int lgK = 8 ;
105
- auto sum_sketch = sum_update_tuple_sketch::builder().set_lg_k(lgK).build() ;
104
+ uint8_t lgK = 8;
105
+ auto sum_sketch = sum_update_tuple_sketch::builder().set_lg_k(lgK).build();
106
106
 
107
107
  sum_sketch.update(1, 1);
108
108
  sum_sketch.update(1, 2);
@@ -124,8 +124,8 @@ public:
124
124
  * Tests that updates into two sketches of sum_update_tuple_sketch flavour, which have been unioned,
125
125
  * cause the stored values of two of the same keys to be summed.
126
126
  */
127
- auto sketch1 = sum_update_tuple_sketch::builder().build() ;
128
- auto sketch2 = sum_update_tuple_sketch::builder().build() ;
127
+ auto sketch1 = sum_update_tuple_sketch::builder().build();
128
+ auto sketch2 = sum_update_tuple_sketch::builder().build();
129
129
 
130
130
  sketch1.update(1, 1);
131
131
  sketch1.update(2, 1);
@@ -135,10 +135,10 @@ public:
135
135
  sketch2.update(2, 1);
136
136
  sketch2.update(3, 7);
137
137
 
138
- auto union_sketch = sum_union_tuple_sketch::builder().build() ;
139
- union_sketch.update(sketch1) ;
140
- union_sketch.update(sketch2) ;
141
- auto union_result = union_sketch.get_result() ;
138
+ auto union_sketch = sum_union_tuple_sketch::builder().build();
139
+ union_sketch.update(sketch1);
140
+ union_sketch.update(sketch2);
141
+ auto union_result = union_sketch.get_result();
142
142
 
143
143
  int num_retained = 0;
144
144
  int sum = 0;
@@ -150,95 +150,95 @@ public:
150
150
  REQUIRE(sum == 15); // 1:(1+2) + 2:(1+1) + 3:(3+7) = 15
151
151
  }
152
152
 
153
- void compute_engagement_histogram(){
153
+ void compute_engagement_histogram() {
154
154
  /*
155
155
  * Returns the estimated histogram from the synthetic data.
156
156
  * On inspection one can verify this agrees with the
157
157
  * https://github.com/apache/datasketches-java/blob/master/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java
158
158
  */
159
- int lgK = 8 ;
160
- const int days = 30 ;
161
- int v = 0 ;
159
+ uint8_t lgK = 8;
160
+ const int days = 30;
161
+ int v = 0;
162
162
  std::set<int> set_array[days];
163
- std::vector<datasketches::update_tuple_sketch<int, int, always_one_policy<int>>> sketch_array ;
163
+ std::vector<datasketches::update_tuple_sketch<int, int, always_one_policy<int>>> sketch_array;
164
164
 
165
165
 
166
- for(int i=0; i<days ; i++){
167
- auto builder = always_one_tuple_sketch::builder(always_one_policy<int>()) ;
168
- builder.set_lg_k(lgK) ;
169
- auto sketch = builder.build() ;
166
+ for (int i = 0; i < days; ++i) {
167
+ auto builder = always_one_tuple_sketch::builder(always_one_policy<int>());
168
+ builder.set_lg_k(lgK);
169
+ auto sketch = builder.build();
170
170
  sketch_array.push_back(sketch);
171
171
  }
172
- REQUIRE(sketch_array.size() == days) ;
172
+ REQUIRE(sketch_array.size() == days);
173
173
 
174
- for(int i=0; i<=days; i++){
175
- int32_t num_ids = get_num_ids(days, i) ;
176
- int32_t num_days = get_num_days(days, i) ;
174
+ for (int i = 0; i <= days; ++i) {
175
+ int32_t num_ids = get_num_ids(days, i);
176
+ int32_t num_days = get_num_days(days, i);
177
177
 
178
- int my_v = v++ ;
179
- for(int d=0 ; d<num_days; d++){
180
- for(int id = 0; id < num_ids; id++){
181
- set_array[d].insert(my_v + id) ;
182
- sketch_array[d].update(my_v + id, 1) ;
178
+ int my_v = v++;
179
+ for (int d = 0; d < num_days; ++d) {
180
+ for (int id = 0; id < num_ids; ++id) {
181
+ set_array[d].insert(my_v + id);
182
+ sketch_array[d].update(my_v + id, 1);
183
183
  }
184
184
  }
185
- v += num_ids ;
185
+ v += num_ids;
186
186
  }
187
- union_ops(lgK, sketch_array) ;
187
+ union_ops(lgK, sketch_array);
188
188
  }
189
189
  private:
190
- int32_t get_num_ids(int total_days, int index){
190
+ int32_t get_num_ids(int total_days, int index) {
191
191
  /*
192
192
  * Generates power law distributed synthetic data
193
193
  */
194
- double d = total_days ;
195
- double i = index ;
196
- return int(round(exp(i * log(d) / d))) ;
194
+ double d = total_days;
195
+ double i = index;
196
+ return int(round(exp(i * log(d) / d)));
197
197
  }
198
198
 
199
- int32_t get_num_days(int total_days, int index){
200
- double d = total_days ;
201
- double i = index ;
202
- return int(round(exp( (d-i) * log(d) / d ))) ;
199
+ int32_t get_num_days(int total_days, int index) {
200
+ double d = total_days;
201
+ double i = index;
202
+ return int(round(exp((d-i) * log(d) / d )));
203
203
  }
204
204
 
205
- int32_t round_double_to_int(double x){
206
- return int(std::round(x)) ;
205
+ int32_t round_double_to_int(double x) {
206
+ return int(std::round(x));
207
207
  }
208
208
 
209
- void union_ops(int lgk, std::vector<datasketches::update_tuple_sketch<int, int, always_one_policy<int>>> sketches){
210
- int num_sketches = sketches.size() ;
211
- auto u = sum_union_tuple_sketch::builder().set_lg_k(lgk).build() ;
209
+ void union_ops(uint8_t lgk, std::vector<datasketches::update_tuple_sketch<int, int, always_one_policy<int>>> sketches) {
210
+ auto num_sketches = sketches.size();
211
+ auto u = sum_union_tuple_sketch::builder().set_lg_k(lgk).build();
212
212
 
213
- for(auto sk:sketches){
214
- u.update(sk) ;
213
+ for (auto sk: sketches) {
214
+ u.update(sk);
215
215
  }
216
- auto union_result = u.get_result() ;
217
- std::vector<uint64_t> num_days_arr(num_sketches+1) ;
216
+ auto union_result = u.get_result();
217
+ std::vector<uint64_t> num_days_arr(num_sketches+1);
218
218
 
219
219
  for (const auto& entry: union_result) {
220
- int num_days_visited = entry.second ;
221
- num_days_arr[num_days_visited]++;
220
+ int num_days_visited = entry.second;
221
+ ++num_days_arr[num_days_visited];
222
222
  }
223
223
 
224
- int sum_visits = 0;
224
+ uint64_t sum_visits = 0;
225
225
  double theta = union_result.get_theta();
226
- std::cout <<"\t\tEngagement Histogram.\t\t\t\n" ;
227
- std::cout << "Number of Unique Visitors by Number of Days Visited" << std::endl ;
228
- std::cout << "---------------------------------------------------" << std::endl ;
226
+ std::cout <<"\t\tEngagement Histogram.\t\t\t\n";
227
+ std::cout << "Number of Unique Visitors by Number of Days Visited" << std::endl;
228
+ std::cout << "---------------------------------------------------" << std::endl;
229
229
 
230
230
  std::cout << std::setw(12) << "Days Visited"
231
231
  << std::setw(12) << "Estimate"
232
232
  << std::setw(12) << "LB"
233
233
  << std::setw(12) << "UB"
234
- << std:: endl ;
234
+ << std:: endl;
235
235
 
236
- for (uint64_t i = 0; i < num_days_arr.size(); i++) {
237
- int visitors_at_days_visited = num_days_arr[i] ;
238
- if(visitors_at_days_visited == 0){ continue; }
239
- sum_visits += visitors_at_days_visited * i ;
236
+ for (size_t i = 0; i < num_days_arr.size(); ++i) {
237
+ auto visitors_at_days_visited = num_days_arr[i];
238
+ if (visitors_at_days_visited == 0) continue;
239
+ sum_visits += visitors_at_days_visited * i;
240
240
 
241
- double est_visitors_at_days_visited = visitors_at_days_visited / theta ;
241
+ double est_visitors_at_days_visited = visitors_at_days_visited / theta;
242
242
  double lower_bound_at_days_visited = union_result.get_lower_bound(num_std_dev, visitors_at_days_visited);
243
243
  double upper_bound_at_days_visited = union_result.get_upper_bound(num_std_dev, visitors_at_days_visited);
244
244
 
@@ -246,27 +246,25 @@ private:
246
246
  << std::setw(12) << est_visitors_at_days_visited
247
247
  << std::setw(12) << lower_bound_at_days_visited
248
248
  << std::setw(12) << upper_bound_at_days_visited
249
- << std:: endl ;
250
-
251
- }
252
- std::cout << std::endl << std::endl ;
249
+ << std:: endl;
250
+ }
251
+ std::cout << std::endl << std::endl;
253
252
  std::cout << std::setw(12) << "Totals"
254
253
  << std::setw(12) << "Estimate"
255
254
  << std::setw(12) << "LB"
256
255
  << std::setw(12) << "UB"
257
- << std:: endl ;
258
- std::cout << "---------------------------------------------------" << std::endl ;
259
-
260
- const double total_visitors = union_result.get_estimate() ;
261
- const double lb_visitors = union_result.get_lower_bound(num_std_dev) ;
262
- const double ub_visitors = union_result.get_upper_bound(num_std_dev) ;
256
+ << std:: endl;
257
+ std::cout << "---------------------------------------------------" << std::endl;
263
258
 
259
+ const double total_visitors = union_result.get_estimate();
260
+ const double lb_visitors = union_result.get_lower_bound(num_std_dev);
261
+ const double ub_visitors = union_result.get_upper_bound(num_std_dev);
264
262
 
265
263
  std::cout << std::setw(12) << "Visitors"
266
264
  << std::setw(12) << total_visitors
267
265
  << std::setw(12) << lb_visitors
268
266
  << std::setw(12) << ub_visitors
269
- << std:: endl ;
267
+ << std:: endl;
270
268
 
271
269
  // The total number of visits, however, is a scaled metric and takes advantage of the fact that
272
270
  // the retained entries in the sketch is a uniform random sample of all unique visitors, and
@@ -275,25 +273,23 @@ private:
275
273
  const double lb_visits = est_visits * lb_visitors / total_visitors;
276
274
  const double ub_visits = est_visits * ub_visitors / total_visitors;
277
275
 
278
-
279
276
  std::cout << std::setw(12) << "Visits"
280
277
  << std::setw(12) << est_visits
281
278
  << std::setw(12) << lb_visits
282
279
  << std::setw(12) << ub_visits
283
- << std:: endl ;
280
+ << std:: endl;
284
281
  }
285
282
 
286
283
  };
287
284
 
288
285
  namespace datasketches {
289
286
 
290
- TEST_CASE("engagement", "[engagement]") {
291
- EngagementTest E ;
292
- E.test_always_one_update() ;
293
- E.test_sum_update_policy() ;
294
- E.test_sum_union_policy() ;
295
- E.compute_engagement_histogram() ;
296
- }
297
-
287
+ TEST_CASE("engagement", "[engagement]") {
288
+ EngagementTest E;
289
+ E.test_always_one_update();
290
+ E.test_sum_update_policy();
291
+ E.test_sum_union_policy();
292
+ E.compute_engagement_histogram();
293
+ }
298
294
 
299
- } /* namespace datasketches */
295
+ } /* namespace datasketches */
@@ -25,7 +25,9 @@
25
25
 
26
26
  namespace datasketches {
27
27
 
28
- using tuple_jaccard_similarity_float = tuple_jaccard_similarity<float, default_union_policy<float>>;
28
+ // there is no default policy for intersection
29
+ // let's combine values the same way as in union for testing
30
+ using tuple_jaccard_similarity_float = tuple_jaccard_similarity<float, default_tuple_union_policy<float>>;
29
31
 
30
32
  TEST_CASE("tuple jaccard: empty", "[tuple_sketch]") {
31
33
  auto sk_a = update_tuple_sketch<float>::builder().build();
@@ -0,0 +1,47 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch2/catch.hpp>
21
+ #include <fstream>
22
+ #include <tuple_sketch.hpp>
23
+
24
+ namespace datasketches {
25
+
26
+ // assume the binary sketches for this test have been generated by datasketches-java code
27
+ // in the subdirectory called "java" in the root directory of this project
28
+ static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";
29
+
30
+ TEST_CASE("tuple sketch int", "[serde_compat]") {
31
+ const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
32
+ for (const unsigned n: n_arr) {
33
+ std::ifstream is;
34
+ is.exceptions(std::ios::failbit | std::ios::badbit);
35
+ is.open(testBinaryInputPath + "tuple_int_n" + std::to_string(n) + "_java.sk", std::ios::binary);
36
+ const auto sketch = compact_tuple_sketch<int>::deserialize(is);
37
+ REQUIRE(sketch.is_empty() == (n == 0));
38
+ REQUIRE(sketch.is_estimation_mode() == (n > 1000));
39
+ REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
40
+ for (const auto& entry: sketch) {
41
+ REQUIRE(entry.first < sketch.get_theta64());
42
+ REQUIRE(entry.second < static_cast<int>(n));
43
+ }
44
+ }
45
+ }
46
+
47
+ } /* namespace datasketches */
@@ -0,0 +1,38 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch2/catch.hpp>
21
+ #include <fstream>
22
+ #include <tuple_sketch.hpp>
23
+
24
+ namespace datasketches {
25
+
26
+ TEST_CASE("tuple sketch int generate", "[serialize_for_java]") {
27
+ const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
28
+ for (const unsigned n: n_arr) {
29
+ auto sketch = update_tuple_sketch<int>::builder().build();
30
+ for (unsigned i = 0; i < n; ++i) sketch.update(i, i);
31
+ REQUIRE(sketch.is_empty() == (n == 0));
32
+ REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
33
+ std::ofstream os("tuple_int_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
34
+ sketch.compact().serialize(os);
35
+ }
36
+ }
37
+
38
+ } /* namespace datasketches */
@@ -86,7 +86,7 @@ TEST_CASE("tuple sketch float: empty", "[tuple_sketch]") {
86
86
  REQUIRE(update_sketch.compact(false).is_ordered());
87
87
  }
88
88
 
89
- TEST_CASE("tuple sketch: single item", "[theta_sketch]") {
89
+ TEST_CASE("tuple sketch: single item", "[tuple_sketch]") {
90
90
  auto update_sketch = update_tuple_sketch<float>::builder().build();
91
91
  update_sketch.update(1, 1.0f);
92
92
  REQUIRE_FALSE(update_sketch.is_empty());
@@ -1 +1 @@
1
- 4.1.0
1
+ 5.0.0