datasketches 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +2 -2
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/NOTICE +1 -1
  15. data/vendor/datasketches-cpp/README.md +1 -3
  16. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  21. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  23. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  24. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  25. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  26. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  27. data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
  28. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
  29. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
  30. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
  31. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
  32. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  33. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  34. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  35. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  36. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
  37. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  38. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  42. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  43. data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  46. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
  47. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
  48. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
  49. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  50. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  51. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  52. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  53. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  54. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  55. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  56. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  57. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
  58. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
  59. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  60. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
  62. data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
  63. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  64. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  65. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  66. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  67. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
  70. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  71. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
  74. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
  75. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
  76. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  77. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  78. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  79. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
  80. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
  84. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  85. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  86. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  87. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  88. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  89. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  90. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
  91. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  92. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
  94. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
  95. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
  96. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
  97. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  98. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  99. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  100. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  101. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  102. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  103. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
  107. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
  108. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  110. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  114. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  115. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  116. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  117. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
  120. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
  121. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  122. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  124. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
  125. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
  126. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
  127. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
  130. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  131. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  132. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  133. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  134. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  135. data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
  136. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  137. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  138. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  139. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  140. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  141. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  142. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  143. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  144. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  145. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  146. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  147. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  148. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  149. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  150. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  151. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  152. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
  153. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  154. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  155. metadata +47 -93
  156. data/vendor/datasketches-cpp/MANIFEST.in +0 -39
  157. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  160. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  161. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  162. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  163. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  164. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  165. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  166. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  167. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  168. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
  169. data/vendor/datasketches-cpp/python/README.md +0 -85
  170. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
  171. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
  172. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
  173. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
  174. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
  175. data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
  176. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
  177. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  178. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
  179. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
  180. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  181. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  182. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  183. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  184. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  185. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  186. data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
  187. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
  188. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
  189. data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
  190. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
  191. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
  192. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
  193. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
  194. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  195. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
  196. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
  197. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
  198. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
  199. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
  200. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  201. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  202. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  203. data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
  204. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  205. data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
  206. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
  207. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
  208. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
  209. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
  210. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
  211. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
  212. data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
  213. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  214. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
  215. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  216. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  217. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  218. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  219. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  220. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  221. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  222. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  223. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  224. data/vendor/datasketches-cpp/setup.py +0 -110
  225. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tox.ini +0 -26
  230. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  231. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  232. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  233. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  234. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -38,78 +38,77 @@ _num_hashes(num_hashes),
38
38
  _num_buckets(num_buckets),
39
39
  _sketch_array((num_hashes*num_buckets < 1<<30) ? num_hashes*num_buckets : 0, 0, _allocator),
40
40
  _seed(seed),
41
- _total_weight(0){
42
- if(num_buckets < 3) throw std::invalid_argument("Using fewer than 3 buckets incurs relative error greater than 1.") ;
41
+ _total_weight(0) {
42
+ if (num_buckets < 3) throw std::invalid_argument("Using fewer than 3 buckets incurs relative error greater than 1.");
43
43
 
44
44
  // This check is to ensure later compatibility with a Java implementation whose maximum size can only
45
45
  // be 2^31-1. We check only against 2^30 for simplicity.
46
- if(num_buckets*num_hashes >= 1<<30) {
46
+ if (num_buckets * num_hashes >= 1 << 30) {
47
47
  throw std::invalid_argument("These parameters generate a sketch that exceeds 2^30 elements."
48
- "Try reducing either the number of buckets or the number of hash functions.") ;
48
+ "Try reducing either the number of buckets or the number of hash functions.");
49
49
  }
50
50
 
51
51
  std::default_random_engine rng(_seed);
52
52
  std::uniform_int_distribution<uint64_t> extra_hash_seeds(0, std::numeric_limits<uint64_t>::max());
53
- hash_seeds.reserve(num_hashes) ;
53
+ hash_seeds.reserve(num_hashes);
54
54
 
55
- for(uint64_t i=0 ; i < num_hashes ; ++i){
55
+ for (uint64_t i=0; i < num_hashes; ++i) {
56
56
  hash_seeds.push_back(extra_hash_seeds(rng) + _seed); // Adds the global seed to all hash functions.
57
57
  }
58
58
  }
59
59
 
60
60
  template<typename W, typename A>
61
- uint8_t count_min_sketch<W,A>::get_num_hashes() const{
62
- return _num_hashes ;
61
+ uint8_t count_min_sketch<W,A>::get_num_hashes() const {
62
+ return _num_hashes;
63
63
  }
64
64
 
65
65
  template<typename W, typename A>
66
- uint32_t count_min_sketch<W,A>::get_num_buckets() const{
67
- return _num_buckets ;
66
+ uint32_t count_min_sketch<W,A>::get_num_buckets() const {
67
+ return _num_buckets;
68
68
  }
69
69
 
70
70
  template<typename W, typename A>
71
71
  uint64_t count_min_sketch<W,A>::get_seed() const {
72
- return _seed ;
72
+ return _seed;
73
73
  }
74
74
 
75
75
  template<typename W, typename A>
76
- double count_min_sketch<W,A>::get_relative_error() const{
77
- return exp(1.0) / double(_num_buckets) ;
76
+ double count_min_sketch<W,A>::get_relative_error() const {
77
+ return exp(1.0) / double(_num_buckets);
78
78
  }
79
79
 
80
80
  template<typename W, typename A>
81
- W count_min_sketch<W,A>::get_total_weight() const{
82
- return _total_weight ;
81
+ W count_min_sketch<W,A>::get_total_weight() const {
82
+ return _total_weight;
83
83
  }
84
84
 
85
85
  template<typename W, typename A>
86
- uint32_t count_min_sketch<W,A>::suggest_num_buckets(double relative_error){
86
+ uint32_t count_min_sketch<W,A>::suggest_num_buckets(double relative_error) {
87
87
  /*
88
88
  * Function to help users select a number of buckets for a given error.
89
89
  * TODO: Change this when we use only power of 2 buckets.
90
- *
91
90
  */
92
- if(relative_error < 0.){
93
- throw std::invalid_argument( "Relative error must be at least 0." );
91
+ if (relative_error < 0.) {
92
+ throw std::invalid_argument("Relative error must be at least 0.");
94
93
  }
95
- return ceil(exp(1.0) / relative_error);
94
+ return static_cast<uint32_t>(ceil(exp(1.0) / relative_error));
96
95
  }
97
96
 
98
97
  template<typename W, typename A>
99
- uint8_t count_min_sketch<W,A>::suggest_num_hashes(double confidence){
98
+ uint8_t count_min_sketch<W,A>::suggest_num_hashes(double confidence) {
100
99
  /*
101
100
  * Function to help users select a number of hashes for a given confidence
102
101
  * e.g. confidence = 1 - failure probability
103
102
  * failure probability == delta in the literature.
104
103
  */
105
- if(confidence < 0. || confidence > 1.0){
106
- throw std::invalid_argument( "Confidence must be between 0 and 1.0 (inclusive)." );
104
+ if (confidence < 0. || confidence > 1.0) {
105
+ throw std::invalid_argument("Confidence must be between 0 and 1.0 (inclusive).");
107
106
  }
108
- return std::min<uint8_t>( ceil(log(1.0/(1.0 - confidence))), UINT8_MAX) ;
107
+ return std::min<uint8_t>(ceil(log(1.0 / (1.0 - confidence))), UINT8_MAX);
109
108
  }
110
109
 
111
110
  template<typename W, typename A>
112
- std::vector<uint64_t> count_min_sketch<W,A>::get_hashes(const void* item, size_t size) const{
111
+ std::vector<uint64_t> count_min_sketch<W,A>::get_hashes(const void* item, size_t size) const {
113
112
  /*
114
113
  * Returns the hash locations for the input item using the original hashing
115
114
  * scheme from [1].
@@ -124,20 +123,20 @@ std::vector<uint64_t> count_min_sketch<W,A>::get_hashes(const void* item, size_t
124
123
  * https://github.com/Claudenw/BloomFilter/wiki/Bloom-Filters----An-overview
125
124
  * https://www.eecs.harvard.edu/~michaelm/postscripts/tr-02-05.pdf
126
125
  */
127
- uint64_t bucket_index ;
128
- std::vector<uint64_t> sketch_update_locations; //(_num_hashes) ;
129
- sketch_update_locations.reserve(_num_hashes) ;
126
+ uint64_t bucket_index;
127
+ std::vector<uint64_t> sketch_update_locations;
128
+ sketch_update_locations.reserve(_num_hashes);
130
129
 
131
- uint64_t hash_seed_index = 0 ;
132
- for(const auto &it : hash_seeds){
130
+ uint64_t hash_seed_index = 0;
131
+ for (const auto &it: hash_seeds) {
133
132
  HashState hashes;
134
133
  MurmurHash3_x64_128(item, size, it, hashes); // ? BEWARE OVERFLOW.
135
- uint64_t hash = hashes.h1 ;
136
- bucket_index = hash % _num_buckets ;
137
- sketch_update_locations.push_back((hash_seed_index * _num_buckets) + bucket_index) ;
138
- hash_seed_index += 1 ;
134
+ uint64_t hash = hashes.h1;
135
+ bucket_index = hash % _num_buckets;
136
+ sketch_update_locations.push_back((hash_seed_index * _num_buckets) + bucket_index);
137
+ hash_seed_index += 1;
139
138
  }
140
- return sketch_update_locations ;
139
+ return sketch_update_locations;
141
140
  }
142
141
 
143
142
  template<typename W, typename A>
@@ -148,7 +147,7 @@ W count_min_sketch<W,A>::get_estimate(int64_t item) const {return get_estimate(&
148
147
 
149
148
  template<typename W, typename A>
150
149
  W count_min_sketch<W,A>::get_estimate(const std::string& item) const {
151
- if (item.empty()) return 0 ; // Empty strings are not inserted into the sketch.
150
+ if (item.empty()) return 0; // Empty strings are not inserted into the sketch.
152
151
  return get_estimate(item.c_str(), item.length());
153
152
  }
154
153
 
@@ -157,13 +156,12 @@ W count_min_sketch<W,A>::get_estimate(const void* item, size_t size) const {
157
156
  /*
158
157
  * Returns the estimated frequency of the item
159
158
  */
160
- std::vector<uint64_t> hash_locations = get_hashes(item, size) ;
161
- std::vector<W> estimates ;
162
- for (auto h: hash_locations){
163
- estimates.push_back(_sketch_array[h]) ;
159
+ std::vector<uint64_t> hash_locations = get_hashes(item, size);
160
+ std::vector<W> estimates;
161
+ for (const auto h: hash_locations) {
162
+ estimates.push_back(_sketch_array[h]);
164
163
  }
165
- W result = *std::min_element(estimates.begin(), estimates.end());
166
- return result ;
164
+ return *std::min_element(estimates.begin(), estimates.end());
167
165
  }
168
166
 
169
167
  template<typename W, typename A>
@@ -171,44 +169,27 @@ void count_min_sketch<W,A>::update(uint64_t item, W weight) {
171
169
  update(&item, sizeof(item), weight);
172
170
  }
173
171
 
174
- template<typename W, typename A>
175
- void count_min_sketch<W,A>::update(uint64_t item) {
176
- update(&item, sizeof(item), 1);
177
- }
178
-
179
172
  template<typename W, typename A>
180
173
  void count_min_sketch<W,A>::update(int64_t item, W weight) {
181
174
  update(&item, sizeof(item), weight);
182
175
  }
183
176
 
184
- template<typename W, typename A>
185
- void count_min_sketch<W,A>::update(int64_t item) {
186
- update(&item, sizeof(item), 1);
187
- }
188
-
189
177
  template<typename W, typename A>
190
178
  void count_min_sketch<W,A>::update(const std::string& item, W weight) {
191
179
  if (item.empty()) return;
192
180
  update(item.c_str(), item.length(), weight);
193
181
  }
194
182
 
195
- template<typename W, typename A>
196
- void count_min_sketch<W,A>::update(const std::string& item) {
197
- if (item.empty()) return;
198
- update(item.c_str(), item.length(), 1);
199
- }
200
-
201
183
  template<typename W, typename A>
202
184
  void count_min_sketch<W,A>::update(const void* item, size_t size, W weight) {
203
185
  /*
204
186
  * Gets the item's hash locations and then increments the sketch in those
205
187
  * locations by the weight.
206
188
  */
207
- W magnitude = (weight >= 0) ? weight : -weight ;
208
- _total_weight += magnitude ;
209
- std::vector<uint64_t> hash_locations = get_hashes(item, size) ;
210
- for (auto h: hash_locations){
211
- _sketch_array[h] += weight ;
189
+ _total_weight += weight >= 0 ? weight : -weight;
190
+ std::vector<uint64_t> hash_locations = get_hashes(item, size);
191
+ for (const auto h: hash_locations) {
192
+ _sketch_array[h] += weight;
212
193
  }
213
194
  }
214
195
 
@@ -220,13 +201,13 @@ W count_min_sketch<W,A>::get_upper_bound(int64_t item) const {return get_upper_b
220
201
 
221
202
  template<typename W, typename A>
222
203
  W count_min_sketch<W,A>::get_upper_bound(const std::string& item) const {
223
- if (item.empty()) return 0 ; // Empty strings are not inserted into the sketch.
204
+ if (item.empty()) return 0; // Empty strings are not inserted into the sketch.
224
205
  return get_upper_bound(item.c_str(), item.length());
225
206
  }
226
207
 
227
208
  template<typename W, typename A>
228
209
  W count_min_sketch<W,A>::get_upper_bound(const void* item, size_t size) const {
229
- return get_estimate(item, size) + get_relative_error()*get_total_weight() ;
210
+ return static_cast<W>(get_estimate(item, size) + get_relative_error() * get_total_weight());
230
211
  }
231
212
 
232
213
  template<typename W, typename A>
@@ -237,41 +218,41 @@ W count_min_sketch<W,A>::get_lower_bound(int64_t item) const {return get_lower_b
237
218
 
238
219
  template<typename W, typename A>
239
220
  W count_min_sketch<W,A>::get_lower_bound(const std::string& item) const {
240
- if (item.empty()) return 0 ; // Empty strings are not inserted into the sketch.
221
+ if (item.empty()) return 0; // Empty strings are not inserted into the sketch.
241
222
  return get_lower_bound(item.c_str(), item.length());
242
223
  }
243
224
 
244
225
  template<typename W, typename A>
245
226
  W count_min_sketch<W,A>::get_lower_bound(const void* item, size_t size) const {
246
- return get_estimate(item, size) ;
227
+ return get_estimate(item, size);
247
228
  }
248
229
 
249
230
  template<typename W, typename A>
250
- void count_min_sketch<W,A>::merge(const count_min_sketch &other_sketch){
231
+ void count_min_sketch<W,A>::merge(const count_min_sketch &other_sketch) {
251
232
  /*
252
233
  * Merges this sketch into other_sketch sketch by elementwise summing of buckets
253
234
  */
254
- if(this == &other_sketch){
235
+ if (this == &other_sketch) {
255
236
  throw std::invalid_argument( "Cannot merge a sketch with itself." );
256
237
  }
257
238
 
258
239
  bool acceptable_config =
259
240
  (get_num_hashes() == other_sketch.get_num_hashes()) &&
260
241
  (get_num_buckets() == other_sketch.get_num_buckets()) &&
261
- (get_seed() == other_sketch.get_seed()) ;
262
- if(!acceptable_config){
242
+ (get_seed() == other_sketch.get_seed());
243
+ if (!acceptable_config) {
263
244
  throw std::invalid_argument( "Incompatible sketch configuration." );
264
245
  }
265
246
 
266
247
  // Merge step - iterate over the other vector and add the weights to this sketch
267
- auto it = _sketch_array.begin() ; // This is a std::vector iterator.
268
- auto other_it = other_sketch.begin() ; //This is a const iterator over the other sketch.
269
- while(it != _sketch_array.end()){
270
- *it += *other_it ;
271
- ++it ;
272
- ++other_it ;
248
+ auto it = _sketch_array.begin(); // This is a std::vector iterator.
249
+ auto other_it = other_sketch.begin(); //This is a const iterator over the other sketch.
250
+ while (it != _sketch_array.end()) {
251
+ *it += *other_it;
252
+ ++it;
253
+ ++other_it;
273
254
  }
274
- _total_weight += other_sketch.get_total_weight() ;
255
+ _total_weight += other_sketch.get_total_weight();
275
256
  }
276
257
 
277
258
  // Iterators
@@ -291,35 +272,34 @@ void count_min_sketch<W,A>::serialize(std::ostream& os) const {
291
272
  //const uint8_t preamble_longs = is_empty() ? PREAMBLE_LONGS_SHORT : PREAMBLE_LONGS_FULL;
292
273
  const uint8_t preamble_longs = PREAMBLE_LONGS_SHORT;
293
274
  const uint8_t ser_ver = SERIAL_VERSION_1;
294
- const uint8_t family_id = FAMILY_ID ;
275
+ const uint8_t family_id = FAMILY_ID;
295
276
  const uint8_t flags_byte = (is_empty() ? 1 << flags::IS_EMPTY : 0);
296
- const uint32_t unused32 = NULL_32 ;
297
- write(os, preamble_longs) ;
298
- write(os, ser_ver) ;
299
- write(os, family_id) ;
300
- write(os, flags_byte) ;
301
- write(os, unused32) ;
277
+ const uint32_t unused32 = NULL_32;
278
+ write(os, preamble_longs);
279
+ write(os, ser_ver);
280
+ write(os, family_id);
281
+ write(os, flags_byte);
282
+ write(os, unused32);
302
283
 
303
284
  // Long 1
304
- const uint32_t nbuckets = _num_buckets ;
305
- const uint8_t nhashes = _num_hashes ;
285
+ const uint32_t nbuckets = _num_buckets;
286
+ const uint8_t nhashes = _num_hashes;
306
287
  const uint16_t seed_hash(compute_seed_hash(_seed));
307
288
  const uint8_t unused8 = NULL_8;
308
- write(os, nbuckets) ;
309
- write(os, nhashes) ;
310
- write(os, seed_hash) ;
311
- write(os, unused8) ;
312
- if (is_empty()) return ; // sketch is empty, no need to write further bytes.
289
+ write(os, nbuckets);
290
+ write(os, nhashes);
291
+ write(os, seed_hash);
292
+ write(os, unused8);
293
+ if (is_empty()) return; // sketch is empty, no need to write further bytes.
313
294
 
314
295
  // Long 2
315
- const W t_weight = _total_weight ;
316
- write(os, t_weight) ;
296
+ write(os, _total_weight);
317
297
 
318
- // Long 3 onwards: remaining bytes are consumed by writing the weight and the array values.
319
- auto it = _sketch_array.begin() ;
320
- while(it != _sketch_array.end()){
321
- write(os, *it) ;
322
- ++it ;
298
+ // Long 3 onwards: remaining bytes are consumed by writing the weight and the array values.
299
+ auto it = _sketch_array.begin();
300
+ while (it != _sketch_array.end()) {
301
+ write(os, *it);
302
+ ++it;
323
303
  }
324
304
  }
325
305
 
@@ -327,40 +307,40 @@ template<typename W, typename A>
327
307
  auto count_min_sketch<W,A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) -> count_min_sketch {
328
308
 
329
309
  // First 8 bytes are 4 bytes of preamble and 4 unused bytes.
330
- const auto preamble_longs = read<uint8_t>(is) ;
331
- const auto serial_version = read<uint8_t>(is) ;
332
- const auto family_id = read<uint8_t>(is) ;
333
- const auto flags_byte = read<uint8_t>(is) ;
334
- read<uint32_t>(is) ; // 4 unused bytes
310
+ const auto preamble_longs = read<uint8_t>(is);
311
+ const auto serial_version = read<uint8_t>(is);
312
+ const auto family_id = read<uint8_t>(is);
313
+ const auto flags_byte = read<uint8_t>(is);
314
+ read<uint32_t>(is); // 4 unused bytes
335
315
 
336
316
  check_header_validity(preamble_longs, serial_version, family_id, flags_byte);
337
317
 
338
318
  // Sketch parameters
339
- const auto nbuckets = read<uint32_t>(is) ;
319
+ const auto nbuckets = read<uint32_t>(is);
340
320
  const auto nhashes = read<uint8_t>(is);
341
- const auto seed_hash = read<uint16_t>(is) ;
342
- read<uint8_t>(is) ; // 1 unused byte
321
+ const auto seed_hash = read<uint16_t>(is);
322
+ read<uint8_t>(is); // 1 unused byte
343
323
 
344
324
  if (seed_hash != compute_seed_hash(seed)) {
345
325
  throw std::invalid_argument("Incompatible seed hashes: " + std::to_string(seed_hash) + ", "
346
326
  + std::to_string(compute_seed_hash(seed)));
347
327
  }
348
- count_min_sketch c(nhashes, nbuckets, seed, allocator) ;
328
+ count_min_sketch c(nhashes, nbuckets, seed, allocator);
349
329
  const bool is_empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
350
- if (is_empty == 1) return c ; // sketch is empty, no need to read further.
330
+ if (is_empty == 1) return c; // sketch is empty, no need to read further.
351
331
 
352
332
  // Set the sketch weight and read in the sketch values
353
- const auto weight = read<W>(is) ;
354
- c._total_weight += weight ;
333
+ const auto weight = read<W>(is);
334
+ c._total_weight += weight;
355
335
  read(is, c._sketch_array.data(), sizeof(W) * c._sketch_array.size());
356
336
 
357
- return c ;
337
+ return c;
358
338
  }
359
339
 
360
340
  template<typename W, typename A>
361
341
  size_t count_min_sketch<W,A>::get_serialized_size_bytes() const {
362
342
  // The header is always 2 longs, whether empty or full
363
- size_t preamble_longs = PREAMBLE_LONGS_SHORT;
343
+ const size_t preamble_longs = PREAMBLE_LONGS_SHORT;
364
344
 
365
345
  // If the sketch is empty, we're done. Otherwise, we need the total weight
366
346
  // held by the sketch as well as a data table of size (num_buckets * num_hashes)
@@ -377,33 +357,33 @@ auto count_min_sketch<W,A>::serialize(unsigned header_size_bytes) const -> vecto
377
357
  ptr += copy_to_mem(preamble_longs, ptr);
378
358
  const uint8_t ser_ver = SERIAL_VERSION_1;
379
359
  ptr += copy_to_mem(ser_ver, ptr);
380
- const uint8_t family_id = FAMILY_ID ;
360
+ const uint8_t family_id = FAMILY_ID;
381
361
  ptr += copy_to_mem(family_id, ptr);
382
362
  const uint8_t flags_byte = (is_empty() ? 1 << flags::IS_EMPTY : 0);
383
363
  ptr += copy_to_mem(flags_byte, ptr);
384
- const uint32_t unused32 = NULL_32 ;
385
- ptr += copy_to_mem(unused32, ptr) ;
364
+ const uint32_t unused32 = NULL_32;
365
+ ptr += copy_to_mem(unused32, ptr);
386
366
 
387
367
  // Long 1
388
- const uint32_t nbuckets = _num_buckets ;
389
- const uint8_t nhashes = _num_hashes ;
368
+ const uint32_t nbuckets = _num_buckets;
369
+ const uint8_t nhashes = _num_hashes;
390
370
  const uint16_t seed_hash(compute_seed_hash(_seed));
391
371
  const uint8_t null_characters_8 = NULL_8;
392
- ptr += copy_to_mem(nbuckets, ptr) ;
393
- ptr += copy_to_mem(nhashes, ptr) ;
394
- ptr += copy_to_mem(seed_hash, ptr) ;
395
- ptr += copy_to_mem(null_characters_8, ptr) ;
396
- if (is_empty()) return bytes ; // sketch is empty, no need to write further bytes.
372
+ ptr += copy_to_mem(nbuckets, ptr);
373
+ ptr += copy_to_mem(nhashes, ptr);
374
+ ptr += copy_to_mem(seed_hash, ptr);
375
+ ptr += copy_to_mem(null_characters_8, ptr);
376
+ if (is_empty()) return bytes; // sketch is empty, no need to write further bytes.
397
377
 
398
378
  // Long 2
399
- const W t_weight = _total_weight ;
400
- ptr += copy_to_mem(t_weight, ptr) ;
379
+ const W t_weight = _total_weight;
380
+ ptr += copy_to_mem(t_weight, ptr);
401
381
 
402
382
  // Long 3 onwards: remaining bytes are consumed by writing the weight and the array values.
403
- auto it = _sketch_array.begin() ;
404
- while(it != _sketch_array.end()){
405
- ptr += copy_to_mem(*it, ptr) ;
406
- ++it ;
383
+ auto it = _sketch_array.begin();
384
+ while (it != _sketch_array.end()) {
385
+ ptr += copy_to_mem(*it, ptr);
386
+ ++it;
407
387
  }
408
388
 
409
389
  return bytes;
@@ -416,45 +396,45 @@ auto count_min_sketch<W,A>::deserialize(const void* bytes, size_t size, uint64_t
416
396
  const char* ptr = static_cast<const char*>(bytes);
417
397
 
418
398
  // First 8 bytes are 4 bytes of preamble and 4 unused bytes.
419
- uint8_t preamble_longs ;
420
- ptr += copy_from_mem(ptr, preamble_longs) ;
421
- uint8_t serial_version ;
422
- ptr += copy_from_mem(ptr, serial_version) ;
423
- uint8_t family_id ;
424
- ptr += copy_from_mem(ptr, family_id) ;
425
- uint8_t flags_byte ;
426
- ptr += copy_from_mem(ptr, flags_byte) ;
399
+ uint8_t preamble_longs;
400
+ ptr += copy_from_mem(ptr, preamble_longs);
401
+ uint8_t serial_version;
402
+ ptr += copy_from_mem(ptr, serial_version);
403
+ uint8_t family_id;
404
+ ptr += copy_from_mem(ptr, family_id);
405
+ uint8_t flags_byte;
406
+ ptr += copy_from_mem(ptr, flags_byte);
427
407
  ptr += sizeof(uint32_t);
428
408
 
429
409
  check_header_validity(preamble_longs, serial_version, family_id, flags_byte);
430
410
 
431
411
  // Second 8 bytes are the sketch parameters with a final, unused byte.
432
- uint32_t nbuckets ;
433
- uint8_t nhashes ;
434
- uint16_t seed_hash ;
435
- ptr += copy_from_mem(ptr, nbuckets) ;
436
- ptr += copy_from_mem(ptr, nhashes) ;
437
- ptr += copy_from_mem(ptr, seed_hash) ;
412
+ uint32_t nbuckets;
413
+ uint8_t nhashes;
414
+ uint16_t seed_hash;
415
+ ptr += copy_from_mem(ptr, nbuckets);
416
+ ptr += copy_from_mem(ptr, nhashes);
417
+ ptr += copy_from_mem(ptr, seed_hash);
438
418
  ptr += sizeof(uint8_t);
439
419
 
440
420
  if (seed_hash != compute_seed_hash(seed)) {
441
421
  throw std::invalid_argument("Incompatible seed hashes: " + std::to_string(seed_hash) + ", "
442
422
  + std::to_string(compute_seed_hash(seed)));
443
423
  }
444
- count_min_sketch c(nhashes, nbuckets, seed, allocator) ;
424
+ count_min_sketch c(nhashes, nbuckets, seed, allocator);
445
425
  const bool is_empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
446
- if (is_empty) return c ; // sketch is empty, no need to read further.
426
+ if (is_empty) return c; // sketch is empty, no need to read further.
447
427
 
448
428
  ensure_minimum_memory(size, sizeof(W) * (1 + nbuckets * nhashes));
449
429
 
450
430
  // Long 2 is the weight.
451
431
  W weight;
452
- ptr += copy_from_mem(ptr, weight) ;
453
- c._total_weight += weight ;
432
+ ptr += copy_from_mem(ptr, weight);
433
+ c._total_weight += weight;
454
434
 
455
435
  // All remaining bytes are the sketch table entries.
456
- for (size_t i = 0; i<c._num_buckets*c._num_hashes ; ++i){
457
- ptr += copy_from_mem(ptr, c._sketch_array[i]) ;
436
+ for (size_t i = 0; i<c._num_buckets*c._num_hashes; ++i) {
437
+ ptr += copy_from_mem(ptr, c._sketch_array[i]);
458
438
  }
459
439
  return c;
460
440
  }
@@ -468,7 +448,7 @@ template<typename W, typename A>
468
448
  string<A> count_min_sketch<W,A>::to_string() const {
469
449
  // count the number of used entries in the sketch
470
450
  uint64_t num_nonzero = 0;
471
- for (auto entry : _sketch_array) {
451
+ for (const auto entry: _sketch_array) {
472
452
  if (entry != static_cast<W>(0.0))
473
453
  ++num_nonzero;
474
454
  }
@@ -497,7 +477,7 @@ void count_min_sketch<W,A>::check_header_validity(uint8_t preamble_longs, uint8_
497
477
  switch (sw) { // exhaustive list and description of all valid cases
498
478
  case 138 : break; // !empty, ser_ver==1, family==18, preLongs=2;
499
479
  case 139 : break; // empty, ser_ver==1, family==18, preLongs=2;
500
- //case 170 : break ; // !empty, ser_ver==1, family==18, preLongs=3 ;
480
+ //case 170 : break; // !empty, ser_ver==1, family==18, preLongs=3;
501
481
  default : // all other case values are invalid
502
482
  valid = false;
503
483
  }
@@ -507,7 +487,7 @@ void count_min_sketch<W,A>::check_header_validity(uint8_t preamble_longs, uint8_
507
487
  os << "Possible sketch corruption. Inconsistent state: "
508
488
  << "preamble_longs = " << static_cast<uint32_t>(preamble_longs)
509
489
  << ", empty = " << (empty ? "true" : "false")
510
- << ", serialization_version = " << static_cast<uint32_t>(serial_version) ;
490
+ << ", serialization_version = " << static_cast<uint32_t>(serial_version);
511
491
  throw std::invalid_argument(os.str());
512
492
  }
513
493
  }
@@ -20,24 +20,23 @@ add_executable(count_min_test)
20
20
  target_link_libraries(count_min_test count common_test_lib)
21
21
 
22
22
  set_target_properties(count_min_test PROPERTIES
23
- CXX_STANDARD 11
24
- CXX_STANDARD_REQUIRED YES
25
- )
23
+ CXX_STANDARD_REQUIRED YES
24
+ )
26
25
 
27
26
  file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" COUNT_TEST_BINARY_PATH)
28
27
  string(APPEND COUNT_TEST_BINARY_PATH "/")
29
28
  target_compile_definitions(count_min_test
30
- PRIVATE
31
- TEST_BINARY_INPUT_PATH="${COUNT_TEST_BINARY_PATH}"
32
- )
29
+ PRIVATE
30
+ TEST_BINARY_INPUT_PATH="${COUNT_TEST_BINARY_PATH}"
31
+ )
33
32
 
34
33
  add_test(
35
- NAME count_min_test
36
- COMMAND count_min_test
34
+ NAME count_min_test
35
+ COMMAND count_min_test
37
36
  )
38
37
 
39
38
  target_sources(count_min_test
40
- PRIVATE
41
- count_min_test.cpp
42
- count_min_allocation_test.cpp
43
- )
39
+ PRIVATE
40
+ count_min_test.cpp
41
+ count_min_allocation_test.cpp
42
+ )