datasketches 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +2 -2
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/NOTICE +1 -1
  15. data/vendor/datasketches-cpp/README.md +1 -3
  16. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  21. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  23. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  24. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  25. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  26. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  27. data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
  28. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
  29. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
  30. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
  31. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
  32. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  33. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  34. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  35. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  36. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
  37. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  38. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  42. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  43. data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  46. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
  47. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
  48. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
  49. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  50. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  51. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  52. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  53. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  54. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  55. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  56. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  57. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
  58. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
  59. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  60. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
  62. data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
  63. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  64. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  65. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  66. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  67. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
  70. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  71. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
  74. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
  75. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
  76. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  77. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  78. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  79. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
  80. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
  84. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  85. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  86. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  87. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  88. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  89. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  90. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
  91. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  92. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
  94. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
  95. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
  96. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
  97. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  98. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  99. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  100. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  101. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  102. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  103. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
  107. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
  108. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  110. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  114. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  115. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  116. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  117. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
  120. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
  121. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  122. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  124. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
  125. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
  126. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
  127. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
  130. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  131. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  132. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  133. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  134. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  135. data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
  136. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  137. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  138. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  139. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  140. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  141. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  142. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  143. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  144. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  145. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  146. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  147. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  148. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  149. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  150. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  151. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  152. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
  153. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  154. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  155. metadata +47 -93
  156. data/vendor/datasketches-cpp/MANIFEST.in +0 -39
  157. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  160. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  161. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  162. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  163. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  164. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  165. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  166. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  167. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  168. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
  169. data/vendor/datasketches-cpp/python/README.md +0 -85
  170. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
  171. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
  172. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
  173. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
  174. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
  175. data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
  176. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
  177. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  178. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
  179. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
  180. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  181. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  182. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  183. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  184. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  185. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  186. data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
  187. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
  188. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
  189. data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
  190. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
  191. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
  192. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
  193. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
  194. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  195. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
  196. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
  197. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
  198. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
  199. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
  200. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  201. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  202. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  203. data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
  204. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  205. data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
  206. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
  207. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
  208. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
  209. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
  210. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
  211. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
  212. data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
  213. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  214. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
  215. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  216. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  217. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  218. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  219. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  220. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  221. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  222. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  223. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  224. data/vendor/datasketches-cpp/setup.py +0 -110
  225. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tox.ini +0 -26
  230. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  231. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  232. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  233. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  234. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -32,120 +32,120 @@ namespace datasketches {
32
32
  using count_min_sketch_test_alloc = count_min_sketch<uint64_t, test_allocator<uint64_t>>;
33
33
  using alloc = test_allocator<uint64_t>;
34
34
 
35
- TEST_CASE("CountMin sketch test allocator: serialize-deserialize empty", "[cm_sketch_alloc]"){
35
+ TEST_CASE("CountMin sketch test allocator: serialize-deserialize empty", "[cm_sketch_alloc]") {
36
36
  test_allocator_total_bytes = 0;
37
37
  test_allocator_net_allocations = 0;
38
38
  {
39
- uint8_t n_hashes = 1 ;
40
- uint32_t n_buckets = 5 ;
39
+ uint8_t n_hashes = 1;
40
+ uint32_t n_buckets = 5;
41
41
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
42
- count_min_sketch_test_alloc c(n_hashes, n_buckets, DEFAULT_SEED, alloc(0)) ;
42
+ count_min_sketch_test_alloc c(n_hashes, n_buckets, DEFAULT_SEED, alloc(0));
43
43
  c.serialize(s);
44
44
  count_min_sketch_test_alloc d = count_min_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0)) ;
45
- REQUIRE(c.get_num_hashes() == d.get_num_hashes()) ;
46
- REQUIRE(c.get_num_buckets() == d.get_num_buckets()) ;
47
- REQUIRE(c.get_seed() == d.get_seed()) ;
45
+ REQUIRE(c.get_num_hashes() == d.get_num_hashes());
46
+ REQUIRE(c.get_num_buckets() == d.get_num_buckets());
47
+ REQUIRE(c.get_seed() == d.get_seed());
48
48
  uint64_t zero = 0;
49
- REQUIRE(c.get_estimate(zero) == d.get_estimate(zero)) ;
50
- REQUIRE(c.get_total_weight() == d.get_total_weight()) ;
49
+ REQUIRE(c.get_estimate(zero) == d.get_estimate(zero));
50
+ REQUIRE(c.get_total_weight() == d.get_total_weight());
51
51
 
52
52
  // Check that all entries are equal and 0
53
- for(auto di: d){
54
- REQUIRE(di == 0) ;
53
+ for (auto di: d) {
54
+ REQUIRE(di == 0);
55
55
  }
56
56
  }
57
57
  REQUIRE(test_allocator_total_bytes == 0);
58
58
  REQUIRE(test_allocator_net_allocations == 0);
59
59
  }
60
60
 
61
- TEST_CASE("CountMin sketch test allocator: serialize-deserialize non-empty", "[cm_sketch_alloc]"){
61
+ TEST_CASE("CountMin sketch test allocator: serialize-deserialize non-empty", "[cm_sketch_alloc]") {
62
62
  test_allocator_total_bytes = 0;
63
63
  test_allocator_net_allocations = 0;
64
64
  {
65
- uint8_t n_hashes = 3 ;
66
- uint32_t n_buckets = 1024 ;
65
+ uint8_t n_hashes = 3;
66
+ uint32_t n_buckets = 1024;
67
67
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
68
- count_min_sketch_test_alloc c(n_hashes, n_buckets, DEFAULT_SEED, alloc(0)) ;
69
- for(uint64_t i=0 ; i < 10; ++i) c.update(i,10*i*i) ;
68
+ count_min_sketch_test_alloc c(n_hashes, n_buckets, DEFAULT_SEED, alloc(0));
69
+ for (uint64_t i = 0; i < 10; ++i) c.update(i, 10 * i * i);
70
70
  c.serialize(s);
71
- count_min_sketch_test_alloc d = count_min_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0)) ;
72
- REQUIRE(c.get_num_hashes() == d.get_num_hashes()) ;
73
- REQUIRE(c.get_num_buckets() == d.get_num_buckets()) ;
74
- REQUIRE(c.get_seed() == d.get_seed()) ;
75
- REQUIRE(c.get_total_weight() == d.get_total_weight()) ;
76
- for(uint64_t i=0 ; i < 10; ++i){
77
- REQUIRE(c.get_estimate(i) == d.get_estimate(i)) ;
71
+ count_min_sketch_test_alloc d = count_min_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0));
72
+ REQUIRE(c.get_num_hashes() == d.get_num_hashes());
73
+ REQUIRE(c.get_num_buckets() == d.get_num_buckets());
74
+ REQUIRE(c.get_seed() == d.get_seed());
75
+ REQUIRE(c.get_total_weight() == d.get_total_weight());
76
+ for (uint64_t i = 0; i < 10; ++i) {
77
+ REQUIRE(c.get_estimate(i) == d.get_estimate(i));
78
78
  }
79
79
 
80
- auto c_it = c.begin() ;
81
- auto d_it = d.begin() ;
82
- while(c_it != c.end()){
83
- REQUIRE(*c_it == *d_it) ;
84
- ++c_it ;
85
- ++d_it ;
80
+ auto c_it = c.begin();
81
+ auto d_it = d.begin();
82
+ while (c_it != c.end()) {
83
+ REQUIRE(*c_it == *d_it);
84
+ ++c_it;
85
+ ++d_it;
86
86
  }
87
87
  }
88
88
  REQUIRE(test_allocator_total_bytes == 0);
89
89
  REQUIRE(test_allocator_net_allocations == 0);
90
90
  }
91
91
 
92
- TEST_CASE("CountMin sketch test allocator: bytes serialize-deserialize empty", "[cm_sketch_alloc]"){
92
+ TEST_CASE("CountMin sketch test allocator: bytes serialize-deserialize empty", "[cm_sketch_alloc]") {
93
93
  test_allocator_total_bytes = 0;
94
94
  test_allocator_net_allocations = 0;
95
95
  {
96
- uint8_t n_hashes = 3 ;
97
- uint32_t n_buckets = 32 ;
98
- count_min_sketch_test_alloc c(n_hashes, n_buckets, DEFAULT_SEED, alloc(0)) ;
99
- auto bytes = c.serialize() ;
96
+ uint8_t n_hashes = 3;
97
+ uint32_t n_buckets = 32;
98
+ count_min_sketch_test_alloc c(n_hashes, n_buckets, DEFAULT_SEED, alloc(0));
99
+ auto bytes = c.serialize();
100
100
 
101
101
  REQUIRE_THROWS_AS(count_min_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED-1, alloc(0)), std::invalid_argument);
102
- auto d = count_min_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED, alloc(0)) ;
103
- REQUIRE(c.get_num_hashes() == d.get_num_hashes()) ;
104
- REQUIRE(c.get_num_buckets() == d.get_num_buckets()) ;
105
- REQUIRE(c.get_seed() == d.get_seed()) ;
102
+ auto d = count_min_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED, alloc(0));
103
+ REQUIRE(c.get_num_hashes() == d.get_num_hashes());
104
+ REQUIRE(c.get_num_buckets() == d.get_num_buckets());
105
+ REQUIRE(c.get_seed() == d.get_seed());
106
106
  uint64_t zero = 0;
107
- REQUIRE(c.get_estimate(zero) == d.get_estimate(zero)) ;
108
- REQUIRE(c.get_total_weight() == d.get_total_weight()) ;
107
+ REQUIRE(c.get_estimate(zero) == d.get_estimate(zero));
108
+ REQUIRE(c.get_total_weight() == d.get_total_weight());
109
109
 
110
110
  // Check that all entries are equal and 0
111
- for(auto di: d){
112
- REQUIRE(di == 0) ;
111
+ for (auto di: d) {
112
+ REQUIRE(di == 0);
113
113
  }
114
114
  }
115
115
  REQUIRE(test_allocator_total_bytes == 0);
116
116
  REQUIRE(test_allocator_net_allocations == 0);
117
117
  }
118
118
 
119
- TEST_CASE("CountMin sketch test allocator: bytes serialize-deserialize non-empty", "[cm_sketch_alloc]"){
119
+ TEST_CASE("CountMin sketch test allocator: bytes serialize-deserialize non-empty", "[cm_sketch_alloc]") {
120
120
  test_allocator_total_bytes = 0;
121
121
  test_allocator_net_allocations = 0;
122
122
  {
123
- uint8_t n_hashes = 5 ;
124
- uint32_t n_buckets = 64 ;
125
- count_min_sketch_test_alloc c(n_hashes, n_buckets, DEFAULT_SEED, alloc(0)) ;
126
- for(uint64_t i=0 ; i < 10; ++i) c.update(i,10*i*i) ;
123
+ uint8_t n_hashes = 5;
124
+ uint32_t n_buckets = 64;
125
+ count_min_sketch_test_alloc c(n_hashes, n_buckets, DEFAULT_SEED, alloc(0));
126
+ for (uint64_t i = 0; i < 10; ++i) c.update(i, 10 * i * i);
127
127
 
128
- auto bytes = c.serialize() ;
128
+ auto bytes = c.serialize();
129
129
  REQUIRE_THROWS_AS(count_min_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED-1, alloc(0)), std::invalid_argument);
130
- auto d = count_min_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED, alloc(0)) ;
130
+ auto d = count_min_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED, alloc(0));
131
131
 
132
- REQUIRE(c.get_num_hashes() == d.get_num_hashes()) ;
133
- REQUIRE(c.get_num_buckets() == d.get_num_buckets()) ;
134
- REQUIRE(c.get_seed() == d.get_seed()) ;
135
- REQUIRE(c.get_total_weight() == d.get_total_weight()) ;
132
+ REQUIRE(c.get_num_hashes() == d.get_num_hashes());
133
+ REQUIRE(c.get_num_buckets() == d.get_num_buckets());
134
+ REQUIRE(c.get_seed() == d.get_seed());
135
+ REQUIRE(c.get_total_weight() == d.get_total_weight());
136
136
 
137
137
  // Check that all entries are equal
138
- auto c_it = c.begin() ;
139
- auto d_it = d.begin() ;
140
- while(c_it != c.end()){
141
- REQUIRE(*c_it == *d_it) ;
142
- ++c_it ;
143
- ++d_it ;
138
+ auto c_it = c.begin();
139
+ auto d_it = d.begin();
140
+ while (c_it != c.end()) {
141
+ REQUIRE(*c_it == *d_it);
142
+ ++c_it;
143
+ ++d_it;
144
144
  }
145
145
 
146
146
  // Check that the estimates agree
147
- for(uint64_t i=0 ; i < 10; ++i){
148
- REQUIRE(c.get_estimate(i) == d.get_estimate(i)) ;
147
+ for (uint64_t i = 0; i < 10; ++i) {
148
+ REQUIRE(c.get_estimate(i) == d.get_estimate(i));
149
149
  }
150
150
  }
151
151
  REQUIRE(test_allocator_total_bytes == 0);
@@ -26,281 +26,278 @@
26
26
  #include "count_min.hpp"
27
27
  #include "common_defs.hpp"
28
28
 
29
- namespace datasketches{
29
+ namespace datasketches {
30
30
 
31
31
  TEST_CASE("CM init - throws") {
32
32
  REQUIRE_THROWS_AS(count_min_sketch<uint64_t>(5, 1), std::invalid_argument);
33
33
  REQUIRE_THROWS_AS(count_min_sketch<uint64_t>(4, 268435456), std::invalid_argument);
34
34
  }
35
35
 
36
- TEST_CASE("CM init"){
37
- uint8_t n_hashes = 3 ;
38
- uint32_t n_buckets = 5 ;
39
- uint64_t seed = 1234567 ;
40
- count_min_sketch<uint64_t> c(n_hashes, n_buckets, seed) ;
41
- REQUIRE(c.get_num_hashes() == n_hashes) ;
42
- REQUIRE(c.get_num_buckets() == n_buckets) ;
43
- REQUIRE(c.get_seed() == seed) ;
44
- REQUIRE(c.is_empty()) ;
45
-
46
- for(auto x: c){
47
- REQUIRE(x == 0) ;
36
+ TEST_CASE("CM init") {
37
+ uint8_t n_hashes = 3;
38
+ uint32_t n_buckets = 5;
39
+ uint64_t seed = 1234567;
40
+ count_min_sketch<uint64_t> c(n_hashes, n_buckets, seed);
41
+ REQUIRE(c.get_num_hashes() == n_hashes);
42
+ REQUIRE(c.get_num_buckets() == n_buckets);
43
+ REQUIRE(c.get_seed() == seed);
44
+ REQUIRE(c.is_empty());
45
+
46
+ for (auto x: c) {
47
+ REQUIRE(x == 0);
48
48
  }
49
49
 
50
50
  // Check the default seed is appropriately set.
51
- count_min_sketch<uint64_t> c1(n_hashes, n_buckets) ;
52
- REQUIRE(c1.get_seed() == DEFAULT_SEED) ;
51
+ count_min_sketch<uint64_t> c1(n_hashes, n_buckets);
52
+ REQUIRE(c1.get_seed() == DEFAULT_SEED);
53
53
  }
54
54
 
55
55
  TEST_CASE("CM parameter suggestions", "[error parameters]") {
56
56
 
57
57
  // Bucket suggestions
58
- REQUIRE_THROWS(count_min_sketch<uint64_t>::suggest_num_buckets(-1.0), "Confidence must be between 0 and 1.0 (inclusive)." ) ;
59
- REQUIRE(count_min_sketch<uint64_t>::suggest_num_buckets(0.2) == 14) ;
60
- REQUIRE(count_min_sketch<uint64_t>::suggest_num_buckets(0.1) == 28) ;
61
- REQUIRE(count_min_sketch<uint64_t>::suggest_num_buckets(0.05) == 55) ;
62
- REQUIRE(count_min_sketch<uint64_t>::suggest_num_buckets(0.01) == 272) ;
58
+ REQUIRE_THROWS(count_min_sketch<uint64_t>::suggest_num_buckets(-1.0), "Confidence must be between 0 and 1.0 (inclusive)." );
59
+ REQUIRE(count_min_sketch<uint64_t>::suggest_num_buckets(0.2) == 14);
60
+ REQUIRE(count_min_sketch<uint64_t>::suggest_num_buckets(0.1) == 28);
61
+ REQUIRE(count_min_sketch<uint64_t>::suggest_num_buckets(0.05) == 55);
62
+ REQUIRE(count_min_sketch<uint64_t>::suggest_num_buckets(0.01) == 272);
63
63
 
64
64
  // Check that the sketch get_epsilon acts inversely to suggest_num_buckets
65
- uint8_t n_hashes = 3 ;
66
- REQUIRE(count_min_sketch<uint64_t>(n_hashes, 14).get_relative_error() <= 0.2) ;
67
- REQUIRE(count_min_sketch<uint64_t>(n_hashes, 28).get_relative_error() <= 0.1) ;
68
- REQUIRE(count_min_sketch<uint64_t>(n_hashes, 55).get_relative_error() <= 0.05) ;
69
- REQUIRE(count_min_sketch<uint64_t>(n_hashes, 272).get_relative_error() <= 0.01) ;
65
+ uint8_t n_hashes = 3;
66
+ REQUIRE(count_min_sketch<uint64_t>(n_hashes, 14).get_relative_error() <= 0.2);
67
+ REQUIRE(count_min_sketch<uint64_t>(n_hashes, 28).get_relative_error() <= 0.1);
68
+ REQUIRE(count_min_sketch<uint64_t>(n_hashes, 55).get_relative_error() <= 0.05);
69
+ REQUIRE(count_min_sketch<uint64_t>(n_hashes, 272).get_relative_error() <= 0.01);
70
70
 
71
71
  // Hash suggestions
72
- REQUIRE_THROWS(count_min_sketch<uint64_t>::suggest_num_hashes(10.0), "Confidence must be between 0 and 1.0 (inclusive)." ) ;
73
- REQUIRE_THROWS(count_min_sketch<uint64_t>::suggest_num_hashes(-1.0), "Confidence must be between 0 and 1.0 (inclusive)." ) ;
74
- REQUIRE(count_min_sketch<uint64_t>::suggest_num_hashes(0.682689492) == 2) ; // 1 STDDEV
75
- REQUIRE(count_min_sketch<uint64_t>::suggest_num_hashes(0.954499736) == 4) ; // 2 STDDEV
76
- REQUIRE(count_min_sketch<uint64_t>::suggest_num_hashes(0.997300204) == 6) ; // 3 STDDEV
72
+ REQUIRE_THROWS(count_min_sketch<uint64_t>::suggest_num_hashes(10.0), "Confidence must be between 0 and 1.0 (inclusive)." );
73
+ REQUIRE_THROWS(count_min_sketch<uint64_t>::suggest_num_hashes(-1.0), "Confidence must be between 0 and 1.0 (inclusive)." );
74
+ REQUIRE(count_min_sketch<uint64_t>::suggest_num_hashes(0.682689492) == 2); // 1 STDDEV
75
+ REQUIRE(count_min_sketch<uint64_t>::suggest_num_hashes(0.954499736) == 4); // 2 STDDEV
76
+ REQUIRE(count_min_sketch<uint64_t>::suggest_num_hashes(0.997300204) == 6); // 3 STDDEV
77
77
  }
78
78
 
79
- TEST_CASE("CM one update: uint64_t"){
80
- uint8_t n_hashes = 3 ;
81
- uint32_t n_buckets = 5 ;
82
- uint64_t seed = 9223372036854775807 ; //1234567 ;
83
- uint64_t inserted_weight = 0 ;
84
- count_min_sketch<uint64_t> c(n_hashes, n_buckets, seed) ;
85
- std::string x = "x" ;
86
-
87
- REQUIRE(c.is_empty()) ;
88
- REQUIRE(c.get_estimate("x") == 0) ; // No items in sketch so estimates should be zero
89
- c.update(x) ;
90
- REQUIRE(!c.is_empty()) ;
91
- REQUIRE(c.get_estimate(x) == 1) ;
92
- inserted_weight += 1 ;
93
-
94
- uint64_t w = 9 ;
95
- inserted_weight += w ;
96
- c.update(x, w) ;
97
- REQUIRE(c.get_estimate(x) == inserted_weight) ;
79
+ TEST_CASE("CM one update: uint64_t") {
80
+ uint8_t n_hashes = 3;
81
+ uint32_t n_buckets = 5;
82
+ uint64_t seed = 9223372036854775807; //1234567;
83
+ uint64_t inserted_weight = 0;
84
+ count_min_sketch<uint64_t> c(n_hashes, n_buckets, seed);
85
+ std::string x = "x";
86
+
87
+ REQUIRE(c.is_empty());
88
+ REQUIRE(c.get_estimate("x") == 0); // No items in sketch so estimates should be zero
89
+ c.update(x);
90
+ REQUIRE(!c.is_empty());
91
+ REQUIRE(c.get_estimate(x) == 1);
92
+ inserted_weight += 1;
93
+
94
+ uint64_t w = 9;
95
+ inserted_weight += w;
96
+ c.update(x, w);
97
+ REQUIRE(c.get_estimate(x) == inserted_weight);
98
98
 
99
99
  // Doubles are converted to uint64_t
100
- double w1 = 10.0 ;
101
- inserted_weight += w1 ;
102
- c.update(x, w1) ;
103
- REQUIRE(c.get_estimate(x) == inserted_weight) ;
104
- REQUIRE(c.get_total_weight() == inserted_weight) ;
105
- REQUIRE(c.get_estimate(x) <= c.get_upper_bound(x)) ;
106
- REQUIRE(c.get_estimate(x) >= c.get_lower_bound(x)) ;
100
+ double w1 = 10.0;
101
+ inserted_weight += static_cast<uint64_t>(w1);
102
+ c.update(x, static_cast<uint64_t>(w1));
103
+ REQUIRE(c.get_estimate(x) == inserted_weight);
104
+ REQUIRE(c.get_total_weight() == inserted_weight);
105
+ REQUIRE(c.get_estimate(x) <= c.get_upper_bound(x));
106
+ REQUIRE(c.get_estimate(x) >= c.get_lower_bound(x));
107
107
  }
108
108
 
109
- TEST_CASE("CM frequency cancellation"){
110
- count_min_sketch<int64_t> c(1, 5) ;
111
- c.update("x") ;
112
- c.update("y", -1) ;
113
- REQUIRE(c.get_total_weight() == 2) ;
114
- REQUIRE(c.get_estimate("x") == 1) ;
115
- REQUIRE(c.get_estimate("y") == -1) ;
109
+ TEST_CASE("CM frequency cancellation") {
110
+ count_min_sketch<int64_t> c(1, 5);
111
+ c.update("x");
112
+ c.update("y", -1);
113
+ REQUIRE(c.get_total_weight() == 2);
114
+ REQUIRE(c.get_estimate("x") == 1);
115
+ REQUIRE(c.get_estimate("y") == -1);
116
116
  }
117
117
 
118
-
119
- TEST_CASE("CM frequency estimates"){
120
- int number_of_items = 10 ;
121
- std::vector<uint64_t> data(number_of_items) ;
122
- std::vector<uint64_t> frequencies(number_of_items) ;
118
+ TEST_CASE("CM frequency estimates") {
119
+ int number_of_items = 10;
120
+ std::vector<uint64_t> data(number_of_items);
121
+ std::vector<uint64_t> frequencies(number_of_items);
123
122
 
124
123
  // Populate data vector
125
- for(int i=0; i < number_of_items; i++){
124
+ for (int i = 0; i < number_of_items; ++i) {
126
125
  data[i] = i;
127
- frequencies[i] = 1 << (number_of_items - i) ;
126
+ frequencies[i] = 1ULL << (number_of_items - i);
128
127
  }
129
128
 
130
- double relative_error = 0.1 ;
131
- double confidence = 0.99 ;
132
- uint8_t n_buckets = count_min_sketch<uint64_t>::suggest_num_buckets(relative_error) ;
133
- uint32_t n_hashes = count_min_sketch<uint64_t>::suggest_num_hashes(confidence) ;
129
+ double relative_error = 0.1;
130
+ double confidence = 0.99;
131
+ uint32_t n_buckets = count_min_sketch<uint64_t>::suggest_num_buckets(relative_error);
132
+ uint8_t n_hashes = count_min_sketch<uint64_t>::suggest_num_hashes(confidence);
134
133
 
135
- count_min_sketch<uint64_t> c(n_hashes, n_buckets) ;
136
- for(int i=0 ; i < number_of_items ; i++) {
137
- uint64_t value = data[i] ;
138
- uint64_t freq = frequencies[i] ;
139
- c.update(value, freq) ;
134
+ count_min_sketch<uint64_t> c(n_hashes, n_buckets);
135
+ for (int i = 0; i < number_of_items; ++i) {
136
+ uint64_t value = data[i];
137
+ uint64_t freq = frequencies[i];
138
+ c.update(value, freq);
140
139
  }
141
140
 
142
- for(const auto i: data){
143
- uint64_t est = c.get_estimate(i) ;
144
- uint64_t upp = c.get_upper_bound(i) ;
145
- uint64_t low = c.get_lower_bound(i) ;
146
- REQUIRE(est <= upp) ;
147
- REQUIRE(est >= low) ;
141
+ for (const auto i: data) {
142
+ uint64_t est = c.get_estimate(i);
143
+ uint64_t upp = c.get_upper_bound(i);
144
+ uint64_t low = c.get_lower_bound(i);
145
+ REQUIRE(est <= upp);
146
+ REQUIRE(est >= low);
148
147
  }
149
148
  }
150
149
 
151
- TEST_CASE("CM merge - reject", "[reject cases]"){
152
- double relative_error = 0.25 ;
153
- double confidence = 0.9 ;
154
- uint32_t n_buckets = count_min_sketch<uint64_t>::suggest_num_buckets(relative_error) ;
155
- uint8_t n_hashes = count_min_sketch<uint64_t>::suggest_num_hashes(confidence) ;
156
- count_min_sketch<uint64_t> s(n_hashes, n_buckets, 9082435234709287) ;
157
-
150
+ TEST_CASE("CM merge - reject", "[reject cases]") {
151
+ double relative_error = 0.25;
152
+ double confidence = 0.9;
153
+ uint32_t n_buckets = count_min_sketch<uint64_t>::suggest_num_buckets(relative_error);
154
+ uint8_t n_hashes = count_min_sketch<uint64_t>::suggest_num_hashes(confidence);
155
+ count_min_sketch<uint64_t> s(n_hashes, n_buckets, 9082435234709287);
158
156
 
159
157
  // Generate sketches that we cannot merge into ie they disagree on at least one of the config entries
160
- count_min_sketch<uint64_t> s1(n_hashes+1, n_buckets) ; // incorrect number of hashes
161
- count_min_sketch<uint64_t> s2(n_hashes, n_buckets+1) ;// incorrect number of buckets
162
- count_min_sketch<uint64_t> s3(n_hashes, n_buckets, 1) ;// incorrect seed
158
+ count_min_sketch<uint64_t> s1(n_hashes+1, n_buckets); // incorrect number of hashes
159
+ count_min_sketch<uint64_t> s2(n_hashes, n_buckets + 1); // incorrect number of buckets
160
+ count_min_sketch<uint64_t> s3(n_hashes, n_buckets, 1); // incorrect seed
163
161
  std::vector<count_min_sketch<uint64_t>> sketches = {s1, s2, s3};
164
162
 
165
163
  // Fail cases
166
- REQUIRE_THROWS(s.merge(s), "Cannot merge a sketch with itself." ) ;
167
- for(count_min_sketch<uint64_t> sk : sketches){
168
- REQUIRE_THROWS(s.merge(sk), "Incompatible sketch config." ) ;
164
+ REQUIRE_THROWS(s.merge(s), "Cannot merge a sketch with itself." );
165
+ for (count_min_sketch<uint64_t> sk : sketches) {
166
+ REQUIRE_THROWS(s.merge(sk), "Incompatible sketch config." );
169
167
  }
170
168
  }
171
169
 
172
- TEST_CASE("CM merge - pass", "[acceptable cases]"){
173
- double relative_error = 0.25 ;
174
- double confidence = 0.9 ;
175
- uint32_t n_buckets = count_min_sketch<uint64_t>::suggest_num_buckets(relative_error) ;
176
- uint8_t n_hashes = count_min_sketch<uint64_t>::suggest_num_hashes(confidence) ;
177
- count_min_sketch<uint64_t> s(n_hashes, n_buckets) ;
178
- uint8_t s_hashes = s.get_num_hashes() ;
179
- uint32_t s_buckets = s.get_num_buckets() ;
180
- count_min_sketch<uint64_t> t(s_hashes, s_buckets) ;
170
+ TEST_CASE("CM merge - pass", "[acceptable cases]") {
171
+ double relative_error = 0.25;
172
+ double confidence = 0.9;
173
+ uint32_t n_buckets = count_min_sketch<uint64_t>::suggest_num_buckets(relative_error);
174
+ uint8_t n_hashes = count_min_sketch<uint64_t>::suggest_num_hashes(confidence);
175
+ count_min_sketch<uint64_t> s(n_hashes, n_buckets);
176
+ uint8_t s_hashes = s.get_num_hashes();
177
+ uint32_t s_buckets = s.get_num_buckets();
178
+ count_min_sketch<uint64_t> t(s_hashes, s_buckets);
181
179
 
182
180
  // Merge in an all-zeros sketch t. Should not change the total weight.
183
- s.merge(t) ;
184
- REQUIRE(s.get_total_weight() == 0 ) ;
181
+ s.merge(t);
182
+ REQUIRE(s.get_total_weight() == 0 );
185
183
 
186
184
  std::vector<uint64_t> data = {2,3,5,7};
187
- for(auto d: data){
188
- s.update(d) ;
189
- t.update(d) ;
185
+ for (auto d: data) {
186
+ s.update(d);
187
+ t.update(d);
190
188
  }
191
189
  s.merge(t);
192
190
 
193
- REQUIRE(s.get_total_weight() == 2*t.get_total_weight());
191
+ REQUIRE(s.get_total_weight() == 2 * t.get_total_weight());
194
192
 
195
193
  // Estimator checks.
196
- for (auto x : data) {
197
- REQUIRE(s.get_estimate(x) <= s.get_upper_bound(x)) ;
194
+ for (auto x: data) {
195
+ REQUIRE(s.get_estimate(x) <= s.get_upper_bound(x));
198
196
  REQUIRE(s.get_estimate(x) <= 2); // True frequency x == 2 for all x.
199
197
  }
200
198
  }
201
199
 
202
- TEST_CASE("CountMin sketch: serialize-deserialize empty", "[cm_sketch]"){
203
- uint8_t n_hashes = 1 ;
204
- uint32_t n_buckets = 5 ;
200
+ TEST_CASE("CountMin sketch: serialize-deserialize empty", "[cm_sketch]") {
201
+ uint8_t n_hashes = 1;
202
+ uint32_t n_buckets = 5;
205
203
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
206
- count_min_sketch<uint64_t> c(n_hashes, n_buckets) ;
204
+ count_min_sketch<uint64_t> c(n_hashes, n_buckets);
207
205
  c.serialize(s);
208
- count_min_sketch<uint64_t> d = count_min_sketch<uint64_t>::deserialize(s, DEFAULT_SEED) ;
209
- REQUIRE(c.get_num_hashes() == d.get_num_hashes()) ;
210
- REQUIRE(c.get_num_buckets() == d.get_num_buckets()) ;
211
- REQUIRE(c.get_seed() == d.get_seed()) ;
206
+ count_min_sketch<uint64_t> d = count_min_sketch<uint64_t>::deserialize(s, DEFAULT_SEED);
207
+ REQUIRE(c.get_num_hashes() == d.get_num_hashes());
208
+ REQUIRE(c.get_num_buckets() == d.get_num_buckets());
209
+ REQUIRE(c.get_seed() == d.get_seed());
212
210
  uint64_t zero = 0;
213
- REQUIRE(c.get_estimate(zero) == d.get_estimate(zero)) ;
214
- REQUIRE(c.get_total_weight() == d.get_total_weight()) ;
211
+ REQUIRE(c.get_estimate(zero) == d.get_estimate(zero));
212
+ REQUIRE(c.get_total_weight() == d.get_total_weight());
215
213
 
216
214
  // Check that all entries are equal and 0
217
- for(auto di: d){
218
- REQUIRE(di == 0) ;
215
+ for (auto di: d) {
216
+ REQUIRE(di == 0);
219
217
  }
220
218
  std::ofstream os("count_min-empty.bin");
221
219
  c.serialize(os);
222
220
  }
223
221
 
224
- TEST_CASE("CountMin sketch: serialize-deserialize non-empty", "[cm_sketch]"){
225
- uint8_t n_hashes = 3 ;
226
- uint32_t n_buckets = 1024 ;
222
+ TEST_CASE("CountMin sketch: serialize-deserialize non-empty", "[cm_sketch]") {
223
+ uint8_t n_hashes = 3;
224
+ uint32_t n_buckets = 1024;
227
225
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
228
- count_min_sketch<uint64_t> c(n_hashes, n_buckets) ;
229
- for(uint64_t i=0 ; i < 10; ++i) c.update(i,10*i*i) ;
226
+ count_min_sketch<uint64_t> c(n_hashes, n_buckets);
227
+ for (uint64_t i = 0; i < 10; ++i) c.update(i, 10 * i * i);
230
228
  c.serialize(s);
231
- count_min_sketch<uint64_t> d = count_min_sketch<uint64_t>::deserialize(s, DEFAULT_SEED) ;
232
- REQUIRE(c.get_num_hashes() == d.get_num_hashes()) ;
233
- REQUIRE(c.get_num_buckets() == d.get_num_buckets()) ;
234
- REQUIRE(c.get_seed() == d.get_seed()) ;
235
- REQUIRE(c.get_total_weight() == d.get_total_weight()) ;
236
- for(uint64_t i=0 ; i < 10; ++i){
237
- REQUIRE(c.get_estimate(i) == d.get_estimate(i)) ;
229
+ count_min_sketch<uint64_t> d = count_min_sketch<uint64_t>::deserialize(s, DEFAULT_SEED);
230
+ REQUIRE(c.get_num_hashes() == d.get_num_hashes());
231
+ REQUIRE(c.get_num_buckets() == d.get_num_buckets());
232
+ REQUIRE(c.get_seed() == d.get_seed());
233
+ REQUIRE(c.get_total_weight() == d.get_total_weight());
234
+ for (uint64_t i = 0; i < 10; ++i) {
235
+ REQUIRE(c.get_estimate(i) == d.get_estimate(i));
238
236
  }
239
237
 
240
- auto c_it = c.begin() ;
241
- auto d_it = d.begin() ;
242
- while(c_it != c.end()){
243
- REQUIRE(*c_it == *d_it) ;
244
- ++c_it ;
245
- ++d_it ;
238
+ auto c_it = c.begin();
239
+ auto d_it = d.begin();
240
+ while (c_it != c.end()) {
241
+ REQUIRE(*c_it == *d_it);
242
+ ++c_it;
243
+ ++d_it;
246
244
  }
247
245
 
248
246
  std::ofstream os("count_min-non-empty.bin");
249
247
  c.serialize(os);
250
248
  }
251
249
 
252
- TEST_CASE("CountMin sketch: bytes serialize-deserialize empty", "[cm_sketch]"){
253
- uint8_t n_hashes = 3 ;
254
- uint32_t n_buckets = 32 ;
255
- count_min_sketch<uint64_t> c(n_hashes, n_buckets) ;
256
- auto bytes = c.serialize() ;
250
+ TEST_CASE("CountMin sketch: bytes serialize-deserialize empty", "[cm_sketch]") {
251
+ uint8_t n_hashes = 3;
252
+ uint32_t n_buckets = 32;
253
+ count_min_sketch<uint64_t> c(n_hashes, n_buckets);
254
+ auto bytes = c.serialize();
257
255
 
258
256
  REQUIRE_THROWS_AS(count_min_sketch<uint64_t>::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED-1), std::invalid_argument);
259
- auto d = count_min_sketch<uint64_t>::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED) ;
260
- REQUIRE(c.get_num_hashes() == d.get_num_hashes()) ;
261
- REQUIRE(c.get_num_buckets() == d.get_num_buckets()) ;
262
- REQUIRE(c.get_seed() == d.get_seed()) ;
257
+ auto d = count_min_sketch<uint64_t>::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED);
258
+ REQUIRE(c.get_num_hashes() == d.get_num_hashes());
259
+ REQUIRE(c.get_num_buckets() == d.get_num_buckets());
260
+ REQUIRE(c.get_seed() == d.get_seed());
263
261
  uint64_t zero = 0;
264
- REQUIRE(c.get_estimate(zero) == d.get_estimate(zero)) ;
265
- REQUIRE(c.get_total_weight() == d.get_total_weight()) ;
262
+ REQUIRE(c.get_estimate(zero) == d.get_estimate(zero));
263
+ REQUIRE(c.get_total_weight() == d.get_total_weight());
266
264
 
267
265
  // Check that all entries are equal and 0
268
- for(auto di: d){
269
- REQUIRE(di == 0) ;
266
+ for (auto di: d) {
267
+ REQUIRE(di == 0);
270
268
  }
271
269
  }
272
270
 
273
271
 
274
- TEST_CASE("CountMin sketch: bytes serialize-deserialize non-empty", "[cm_sketch]"){
275
- uint8_t n_hashes = 5 ;
276
- uint32_t n_buckets = 64 ;
277
- count_min_sketch<uint64_t> c(n_hashes, n_buckets) ;
278
- for(uint64_t i=0 ; i < 10; ++i) c.update(i,10*i*i) ;
272
+ TEST_CASE("CountMin sketch: bytes serialize-deserialize non-empty", "[cm_sketch]") {
273
+ uint8_t n_hashes = 5;
274
+ uint32_t n_buckets = 64;
275
+ count_min_sketch<uint64_t> c(n_hashes, n_buckets);
276
+ for(uint64_t i=0; i < 10; ++i) c.update(i,10*i*i);
279
277
 
280
- auto bytes = c.serialize() ;
278
+ auto bytes = c.serialize();
281
279
  REQUIRE_THROWS_AS(count_min_sketch<uint64_t>::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED-1), std::invalid_argument);
282
- auto d = count_min_sketch<uint64_t>::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED) ;
280
+ auto d = count_min_sketch<uint64_t>::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED);
283
281
 
284
- REQUIRE(c.get_num_hashes() == d.get_num_hashes()) ;
285
- REQUIRE(c.get_num_buckets() == d.get_num_buckets()) ;
286
- REQUIRE(c.get_seed() == d.get_seed()) ;
287
- REQUIRE(c.get_total_weight() == d.get_total_weight()) ;
282
+ REQUIRE(c.get_num_hashes() == d.get_num_hashes());
283
+ REQUIRE(c.get_num_buckets() == d.get_num_buckets());
284
+ REQUIRE(c.get_seed() == d.get_seed());
285
+ REQUIRE(c.get_total_weight() == d.get_total_weight());
288
286
 
289
287
  // Check that all entries are equal
290
- auto c_it = c.begin() ;
291
- auto d_it = d.begin() ;
292
- while(c_it != c.end()){
293
- REQUIRE(*c_it == *d_it) ;
294
- ++c_it ;
295
- ++d_it ;
288
+ auto c_it = c.begin();
289
+ auto d_it = d.begin();
290
+ while (c_it != c.end()) {
291
+ REQUIRE(*c_it == *d_it);
292
+ ++c_it;
293
+ ++d_it;
296
294
  }
297
295
 
298
296
  // Check that the estimates agree
299
- for(uint64_t i=0 ; i < 10; ++i){
300
- REQUIRE(c.get_estimate(i) == d.get_estimate(i)) ;
297
+ for (uint64_t i = 0; i < 10; ++i) {
298
+ REQUIRE(c.get_estimate(i) == d.get_estimate(i));
301
299
  }
302
300
 
303
301
  }
304
302
 
305
303
  } /* namespace datasketches */
306
-