datasketches 0.3.2 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (237) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +2 -2
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/NOTICE +1 -1
  15. data/vendor/datasketches-cpp/README.md +1 -3
  16. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  21. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  23. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  24. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  25. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  26. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  27. data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
  28. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
  29. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
  30. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
  31. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
  32. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  33. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  34. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  35. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  36. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
  37. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  38. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  42. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  43. data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  46. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
  47. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
  48. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
  49. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  50. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  51. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  52. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  53. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  54. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  55. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  56. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  57. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
  58. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
  59. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  60. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
  62. data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
  63. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  64. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  65. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  66. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  67. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -4
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
  70. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  71. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
  74. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
  75. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
  76. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  77. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  78. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  79. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
  80. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
  84. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  85. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  86. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  87. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  88. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  89. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  90. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +539 -0
  91. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  92. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
  94. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
  95. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
  96. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
  97. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  98. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  99. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  100. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  101. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  102. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  103. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
  107. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
  108. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  110. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  114. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  115. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  116. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  117. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
  120. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
  121. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  122. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  124. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
  125. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
  126. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
  127. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
  130. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  131. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  132. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  133. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  134. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  135. data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
  136. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  137. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  138. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  139. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  140. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  141. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  142. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  143. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  144. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  145. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  146. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  147. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  148. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  149. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  150. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  151. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  152. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
  153. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  154. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  155. metadata +47 -93
  156. data/vendor/datasketches-cpp/MANIFEST.in +0 -39
  157. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  160. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  161. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  162. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  163. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  164. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  165. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  166. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  167. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  168. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
  169. data/vendor/datasketches-cpp/python/README.md +0 -85
  170. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
  171. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
  172. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
  173. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
  174. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
  175. data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
  176. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
  177. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  178. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
  179. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
  180. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  181. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  182. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  183. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  184. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  185. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  186. data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
  187. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
  188. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
  189. data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
  190. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
  191. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
  192. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
  193. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
  194. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  195. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
  196. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
  197. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
  198. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
  199. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
  200. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  201. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  202. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  203. data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
  204. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  205. data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
  206. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
  207. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
  208. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
  209. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
  210. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
  211. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
  212. data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
  213. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  214. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
  215. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  216. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  217. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  218. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  219. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  220. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  221. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  222. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  223. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  224. data/vendor/datasketches-cpp/setup.py +0 -110
  225. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tox.ini +0 -26
  230. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  231. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  232. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  233. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  234. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -29,6 +29,7 @@
29
29
  namespace datasketches {
30
30
 
31
31
  /**
32
+ * Bounds on ratios in sampled sets.
32
33
  * This class is used to compute the bounds on the estimate of the ratio <i>|B| / |A|</i>, where:
33
34
  * <ul>
34
35
  * <li><i>|A|</i> is the unknown size of a set <i>A</i> of unique identifiers.</li>
@@ -28,6 +28,7 @@
28
28
  namespace datasketches {
29
29
 
30
30
  /**
31
+ * Bounds on ratios in Theta sketched sets.
31
32
  * This is to compute the bounds on the estimate of the ratio <i>B / A</i>, where:
32
33
  * <ul>
33
34
  * <li><i>A</i> is a Theta Sketch of population <i>PopA</i>.</li>
@@ -50,8 +51,8 @@ class bounds_on_ratios_in_theta_sketched_sets {
50
51
  public:
51
52
  /**
52
53
  * Gets the approximate lower bound for B over A based on a 95% confidence interval
53
- * @param sketchA the sketch A
54
- * @param sketchB the sketch B
54
+ * @param sketch_a the sketch A
55
+ * @param sketch_b the sketch B
55
56
  * @return the approximate lower bound for B over A
56
57
  */
57
58
  template<typename SketchA, typename SketchB>
@@ -72,8 +73,8 @@ public:
72
73
 
73
74
  /**
74
75
  * Gets the approximate upper bound for B over A based on a 95% confidence interval
75
- * @param sketchA the sketch A
76
- * @param sketchB the sketch B
76
+ * @param sketch_a the sketch A
77
+ * @param sketch_b the sketch B
77
78
  * @return the approximate upper bound for B over A
78
79
  */
79
80
  template<typename SketchA, typename SketchB>
@@ -94,8 +95,8 @@ public:
94
95
 
95
96
  /**
96
97
  * Gets the estimate for B over A
97
- * @param sketchA the sketch A
98
- * @param sketchB the sketch B
98
+ * @param sketch_a the sketch A
99
+ * @param sketch_b the sketch B
99
100
  * @return the estimate for B over A
100
101
  */
101
102
  template<typename SketchA, typename SketchB>
@@ -25,6 +25,16 @@
25
25
 
26
26
  namespace datasketches {
27
27
 
28
+ // forward declaration
29
+ template<typename A> class theta_a_not_b_alloc;
30
+
31
+ // alias with default allocator for convenience
32
+ using theta_a_not_b = theta_a_not_b_alloc<std::allocator<uint64_t>>;
33
+
34
+ /**
35
+ * Theta A-not-B (set difference).
36
+ * Computes set difference of Theta sketches.
37
+ */
28
38
  template<typename Allocator = std::allocator<uint64_t>>
29
39
  class theta_a_not_b_alloc {
30
40
  public:
@@ -33,11 +43,19 @@ public:
33
43
  using CompactSketch = compact_theta_sketch_alloc<Allocator>;
34
44
  using State = theta_set_difference_base<Entry, ExtractKey, CompactSketch, Allocator>;
35
45
 
46
+ /**
47
+ * Constructor
48
+ * @param seed for the hash function that was used to create the sketch
49
+ * @param allocator to use for allocating and deallocating memory
50
+ */
36
51
  explicit theta_a_not_b_alloc(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
37
52
 
38
53
  /**
39
- * Computes the a-not-b set operation given two sketches.
40
- * @return the result of a-not-b
54
+ * Computes the A-not-B set operation given two sketches.
55
+ * @param a sketch A
56
+ * @param b sketch B
57
+ * @param ordered optional flag to specify if an ordered sketch should be produced
58
+ * @return the result of A-not-B as a compact sketch
41
59
  */
42
60
  template<typename FwdSketch, typename Sketch>
43
61
  CompactSketch compute(FwdSketch&& a, const Sketch& b, bool ordered = true) const;
@@ -46,9 +64,6 @@ private:
46
64
  State state_;
47
65
  };
48
66
 
49
- // alias with default allocator for convenience
50
- using theta_a_not_b = theta_a_not_b_alloc<std::allocator<uint64_t>>;
51
-
52
67
  } /* namespace datasketches */
53
68
 
54
69
  #include "theta_a_not_b_impl.hpp"
@@ -25,15 +25,21 @@
25
25
 
26
26
  namespace datasketches {
27
27
 
28
+ /// Theta constants
28
29
  namespace theta_constants {
30
+ /// hash table resize factor
29
31
  using resize_factor = datasketches::resize_factor;
30
- //enum resize_factor { X1, X2, X4, X8 };
31
- const uint64_t MAX_THETA = LLONG_MAX; // signed max for compatibility with Java
32
+ /// default resize factor
33
+ const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
34
+
35
+ /// max theta - signed max for compatibility with Java
36
+ const uint64_t MAX_THETA = LLONG_MAX;
37
+ /// min log2 of K
32
38
  const uint8_t MIN_LG_K = 5;
39
+ /// max log2 of K
33
40
  const uint8_t MAX_LG_K = 26;
34
-
41
+ /// default log2 of K
35
42
  const uint8_t DEFAULT_LG_K = 12;
36
- const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
37
43
  }
38
44
 
39
45
  } /* namespace datasketches */
@@ -57,7 +57,7 @@ public:
57
57
  // consistent way of initializing theta from p
58
58
  // avoids multiplication if p == 1 since it might not yield MAX_THETA exactly
59
59
  static uint64_t starting_theta_from_p(float p) {
60
- if (p < 1) return static_cast<float>(theta_constants::MAX_THETA) * p;
60
+ if (p < 1) return static_cast<uint64_t>(theta_constants::MAX_THETA * p);
61
61
  return theta_constants::MAX_THETA;
62
62
  }
63
63
 
@@ -25,6 +25,16 @@
25
25
 
26
26
  namespace datasketches {
27
27
 
28
+ // forward declaration
29
+ template<typename A> class theta_intersection_alloc;
30
+
31
+ // alias with default allocator for convenience
32
+ using theta_intersection = theta_intersection_alloc<std::allocator<uint64_t>>;
33
+
34
+ /**
35
+ * Theta intersection.
36
+ * Computes intersection of Theta sketches.
37
+ */
28
38
  template<typename Allocator = std::allocator<uint64_t>>
29
39
  class theta_intersection_alloc {
30
40
  public:
@@ -33,6 +43,7 @@ public:
33
43
  using Sketch = theta_sketch_alloc<Allocator>;
34
44
  using CompactSketch = compact_theta_sketch_alloc<Allocator>;
35
45
 
46
+ // there is no payload in Theta sketch entry
36
47
  struct nop_policy {
37
48
  void operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
38
49
  unused(incoming_entry);
@@ -41,7 +52,7 @@ public:
41
52
  };
42
53
  using State = theta_intersection_base<Entry, ExtractKey, nop_policy, Sketch, CompactSketch, Allocator>;
43
54
 
44
- /*
55
+ /**
45
56
  * Constructor
46
57
  * @param seed for the hash function that was used to create the sketch
47
58
  * @param allocator to use for allocating and deallocating memory
@@ -61,7 +72,7 @@ public:
61
72
  * Produces a copy of the current state of the intersection.
62
73
  * If update() was not called, the state is the infinite "universe",
63
74
  * which is considered an undefined state, and throws an exception.
64
- * @param ordered optional flag to specify if ordered sketch should be produced
75
+ * @param ordered optional flag to specify if an ordered sketch should be produced
65
76
  * @return the result of the intersection
66
77
  */
67
78
  CompactSketch get_result(bool ordered = true) const;
@@ -76,9 +87,6 @@ private:
76
87
  State state_;
77
88
  };
78
89
 
79
- // alias with default allocator for convenience
80
- using theta_intersection = theta_intersection_alloc<std::allocator<uint64_t>>;
81
-
82
90
  } /* namespace datasketches */
83
91
 
84
92
  #include "theta_intersection_impl.hpp"
@@ -49,8 +49,8 @@ void theta_intersection_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
49
49
  if (!is_valid_) { // first update, copy or move incoming sketch
50
50
  is_valid_ = true;
51
51
  const uint8_t lg_size = lg_size_from_count(sketch.get_num_retained(), theta_update_sketch_base<EN, EK, A>::REBUILD_THRESHOLD);
52
- table_ = hash_table(lg_size, lg_size, resize_factor::X1, 1, table_.theta_, table_.seed_, table_.allocator_, table_.is_empty_);
53
- for (auto& entry: sketch) {
52
+ table_ = hash_table(lg_size, lg_size - 1, resize_factor::X1, 1, table_.theta_, table_.seed_, table_.allocator_, table_.is_empty_);
53
+ for (auto&& entry: sketch) {
54
54
  auto result = table_.find(EK()(entry));
55
55
  if (result.second) {
56
56
  throw std::invalid_argument("duplicate key, possibly corrupted input sketch");
@@ -64,7 +64,7 @@ void theta_intersection_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
64
64
  matched_entries.reserve(max_matches);
65
65
  uint32_t match_count = 0;
66
66
  uint32_t count = 0;
67
- for (auto& entry: sketch) {
67
+ for (auto&& entry: sketch) {
68
68
  if (EK()(entry) < table_.theta_) {
69
69
  auto result = table_.find(EK()(entry));
70
70
  if (result.second) {
@@ -88,8 +88,8 @@ void theta_intersection_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
88
88
  if (table_.theta_ == theta_constants::MAX_THETA) table_.is_empty_ = true;
89
89
  } else {
90
90
  const uint8_t lg_size = lg_size_from_count(match_count, theta_update_sketch_base<EN, EK, A>::REBUILD_THRESHOLD);
91
- table_ = hash_table(lg_size, lg_size, resize_factor::X1, 1, table_.theta_, table_.seed_, table_.allocator_, table_.is_empty_);
92
- for (uint32_t i = 0; i < match_count; i++) {
91
+ table_ = hash_table(lg_size, lg_size - 1, resize_factor::X1, 1, table_.theta_, table_.seed_, table_.allocator_, table_.is_empty_);
92
+ for (uint32_t i = 0; i < match_count; ++i) {
93
93
  auto result = table_.find(EK()(matched_entries[i]));
94
94
  table_.insert(result.first, std::move(matched_entries[i]));
95
95
  }
@@ -28,9 +28,9 @@ state_(seed, nop_policy(), allocator)
28
28
  {}
29
29
 
30
30
  template<typename A>
31
- template<typename SS>
32
- void theta_intersection_alloc<A>::update(SS&& sketch) {
33
- state_.update(std::forward<SS>(sketch));
31
+ template<typename FwdSketch>
32
+ void theta_intersection_alloc<A>::update(FwdSketch&& sketch) {
33
+ state_.update(std::forward<FwdSketch>(sketch));
34
34
  }
35
35
 
36
36
  template<typename A>
@@ -26,10 +26,11 @@
26
26
 
27
27
  namespace datasketches {
28
28
 
29
+ /// Theta Jaccard similarity alias
29
30
  template<typename Allocator = std::allocator<uint64_t>>
30
31
  using theta_jaccard_similarity_alloc = jaccard_similarity_base<theta_union_alloc<Allocator>, theta_intersection_alloc<Allocator>, trivial_extract_key>;
31
32
 
32
- // alias with default allocator for convenience
33
+ /// Theta Jaccard similarity alias with default allocator
33
34
  using theta_jaccard_similarity = theta_jaccard_similarity_alloc<std::allocator<uint64_t>>;
34
35
 
35
36
  } /* namespace datasketches */
@@ -30,6 +30,7 @@
30
30
 
31
31
  namespace datasketches {
32
32
 
33
+ /// Base class for Jaccard similarity
33
34
  template<typename Union, typename Intersection, typename ExtractKey>
34
35
  class jaccard_similarity_base {
35
36
  public:
@@ -65,7 +65,7 @@ CS theta_set_difference_base<EN, EK, CS, A>::compute(FwdSketch&& a, const Sketch
65
65
  }
66
66
 
67
67
  // scan A lookup B
68
- for (auto& entry: a) {
68
+ for (auto&& entry: a) {
69
69
  const uint64_t hash = EK()(entry);
70
70
  if (hash < theta) {
71
71
  auto result = table.find(hash);
@@ -25,6 +25,22 @@
25
25
 
26
26
  namespace datasketches {
27
27
 
28
+ // forward declarations
29
+ template<typename A> class theta_sketch_alloc;
30
+ template<typename A> class update_theta_sketch_alloc;
31
+ template<typename A> class compact_theta_sketch_alloc;
32
+ template<typename A> class wrapped_compact_theta_sketch_alloc;
33
+
34
+ /// Theta sketch alias with default allocator
35
+ using theta_sketch = theta_sketch_alloc<std::allocator<uint64_t>>;
36
+ /// Update Theta sketch alias with default allocator
37
+ using update_theta_sketch = update_theta_sketch_alloc<std::allocator<uint64_t>>;
38
+ /// Compact Theta sketch alias with default allocator
39
+ using compact_theta_sketch = compact_theta_sketch_alloc<std::allocator<uint64_t>>;
40
+ /// Wrapped Compact Theta sketch alias with default allocator
41
+ using wrapped_compact_theta_sketch = wrapped_compact_theta_sketch_alloc<std::allocator<uint64_t>>;
42
+
43
+ /// Abstract base class for Theta sketch
28
44
  template<typename Allocator = std::allocator<uint64_t>>
29
45
  class base_theta_sketch_alloc {
30
46
  public:
@@ -106,6 +122,7 @@ protected:
106
122
  virtual void print_items(std::ostringstream& os) const = 0;
107
123
  };
108
124
 
125
+ /// Base class for the Theta Sketch, a generalization of the Kth Minimum Value (KMV) sketch.
109
126
  template<typename Allocator = std::allocator<uint64_t>>
110
127
  class theta_sketch_alloc: public base_theta_sketch_alloc<Allocator> {
111
128
  public:
@@ -149,6 +166,11 @@ protected:
149
166
  // forward declaration
150
167
  template<typename A> class compact_theta_sketch_alloc;
151
168
 
169
+ /**
170
+ * Update Theta sketch.
171
+ * The purpose of this class is to build a Theta sketch from input data via the update() methods.
172
+ * There is no constructor. Use builder instead.
173
+ */
152
174
  template<typename Allocator = std::allocator<uint64_t>>
153
175
  class update_theta_sketch_alloc: public theta_sketch_alloc<Allocator> {
154
176
  public:
@@ -163,11 +185,33 @@ public:
163
185
  // No constructor here. Use builder instead.
164
186
  class builder;
165
187
 
166
- update_theta_sketch_alloc(const update_theta_sketch_alloc&) = default;
167
- update_theta_sketch_alloc(update_theta_sketch_alloc&&) noexcept = default;
188
+ /**
189
+ * Copy constructor
190
+ * @param other sketch to be copied
191
+ */
192
+ update_theta_sketch_alloc(const update_theta_sketch_alloc& other) = default;
193
+
194
+ /**
195
+ * Move constructor
196
+ * @param other sketch to be moved
197
+ */
198
+ update_theta_sketch_alloc(update_theta_sketch_alloc&& other) noexcept = default;
199
+
168
200
  virtual ~update_theta_sketch_alloc() = default;
169
- update_theta_sketch_alloc& operator=(const update_theta_sketch_alloc&) = default;
170
- update_theta_sketch_alloc& operator=(update_theta_sketch_alloc&&) = default;
201
+
202
+ /**
203
+ * Copy assignment
204
+ * @param other sketch to be copied
205
+ * @return reference to this sketch
206
+ */
207
+ update_theta_sketch_alloc& operator=(const update_theta_sketch_alloc& other) = default;
208
+
209
+ /**
210
+ * Move assignment
211
+ * @param other sketch to be moved
212
+ * @return reference to this sketch
213
+ */
214
+ update_theta_sketch_alloc& operator=(update_theta_sketch_alloc&& other) = default;
171
215
 
172
216
  virtual Allocator get_allocator() const;
173
217
  virtual bool is_empty() const;
@@ -287,7 +331,7 @@ public:
287
331
 
288
332
  /**
289
333
  * Converts this sketch to a compact sketch (ordered or unordered).
290
- * @param ordered optional flag to specify if ordered sketch should be produced
334
+ * @param ordered optional flag to specify if an ordered sketch should be produced
291
335
  * @return compact sketch
292
336
  */
293
337
  compact_theta_sketch_alloc<Allocator> compact(bool ordered = true) const;
@@ -307,8 +351,10 @@ private:
307
351
  virtual void print_specifics(std::ostringstream& os) const;
308
352
  };
309
353
 
310
- // compact sketch
311
-
354
+ /**
355
+ * Compact Theta sketch.
356
+ * This is an immutable form of the Theta sketch, the form that can be serialized and deserialized.
357
+ */
312
358
  template<typename Allocator = std::allocator<uint64_t>>
313
359
  class compact_theta_sketch_alloc: public theta_sketch_alloc<Allocator> {
314
360
  public:
@@ -327,13 +373,42 @@ public:
327
373
  // - as a result of a set operation
328
374
  // - by deserializing a previously serialized compact sketch
329
375
 
376
+ /**
377
+ * Copy constructor.
378
+ * Constructs a compact sketch from any other type of Theta sketch
379
+ * @param other sketch to be constructed from
380
+ * @param ordered if true make the resulting sketch ordered
381
+ */
330
382
  template<typename Other>
331
383
  compact_theta_sketch_alloc(const Other& other, bool ordered);
332
- compact_theta_sketch_alloc(const compact_theta_sketch_alloc&) = default;
333
- compact_theta_sketch_alloc(compact_theta_sketch_alloc&&) noexcept = default;
384
+
385
+ /**
386
+ * Copy constructor
387
+ * @param other sketch to be copied
388
+ */
389
+ compact_theta_sketch_alloc(const compact_theta_sketch_alloc& other) = default;
390
+
391
+ /**
392
+ * Move constructor
393
+ * @param other sketch to be moved
394
+ */
395
+ compact_theta_sketch_alloc(compact_theta_sketch_alloc&& other) noexcept = default;
396
+
334
397
  virtual ~compact_theta_sketch_alloc() = default;
335
- compact_theta_sketch_alloc& operator=(const compact_theta_sketch_alloc&) = default;
336
- compact_theta_sketch_alloc& operator=(compact_theta_sketch_alloc&&) = default;
398
+
399
+ /**
400
+ * Copy assignment
401
+ * @param other sketch to be copied
402
+ * @return reference to this sketch
403
+ */
404
+ compact_theta_sketch_alloc& operator=(const compact_theta_sketch_alloc& other) = default;
405
+
406
+ /**
407
+ * Move assignment
408
+ * @param other sketch to be moved
409
+ * @return reference to this sketch
410
+ */
411
+ compact_theta_sketch_alloc& operator=(compact_theta_sketch_alloc&& other) = default;
337
412
 
338
413
  virtual Allocator get_allocator() const;
339
414
  virtual bool is_empty() const;
@@ -385,6 +460,7 @@ public:
385
460
  * This method deserializes a sketch from a given stream.
386
461
  * @param is input stream
387
462
  * @param seed the seed for the hash function that was used to create the sketch
463
+ * @param allocator instance of an Allocator
388
464
  * @return an instance of the sketch
389
465
  */
390
466
  static compact_theta_sketch_alloc deserialize(std::istream& is,
@@ -395,14 +471,12 @@ public:
395
471
  * @param bytes pointer to the array of bytes
396
472
  * @param size the size of the array
397
473
  * @param seed the seed for the hash function that was used to create the sketch
474
+ * @param allocator instance of an Allocator
398
475
  * @return an instance of the sketch
399
476
  */
400
477
  static compact_theta_sketch_alloc deserialize(const void* bytes, size_t size,
401
478
  uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
402
479
 
403
- // for internal use
404
- compact_theta_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<uint64_t, Allocator>&& entries);
405
-
406
480
  private:
407
481
  enum flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED };
408
482
 
@@ -423,20 +497,33 @@ private:
423
497
  static compact_theta_sketch_alloc deserialize_v4(uint8_t preamble_longs, std::istream& is, uint64_t seed, const Allocator& allocator);
424
498
 
425
499
  virtual void print_specifics(std::ostringstream& os) const;
500
+
501
+ template<typename E, typename EK, typename P, typename S, typename CS, typename A> friend class theta_union_base;
502
+ template<typename E, typename EK, typename P, typename S, typename CS, typename A> friend class theta_intersection_base;
503
+ template<typename E, typename EK, typename CS, typename A> friend class theta_set_difference_base;
504
+ compact_theta_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<uint64_t, Allocator>&& entries);
426
505
  };
427
506
 
507
+ /// Update Theta sketch builder
428
508
  template<typename Allocator>
429
509
  class update_theta_sketch_alloc<Allocator>::builder: public theta_base_builder<builder, Allocator> {
430
510
  public:
511
+ /**
512
+ * Constructor
513
+ * @param allocator
514
+ */
431
515
  builder(const Allocator& allocator = Allocator());
516
+ /// @return instance of Update Theta sketch
432
517
  update_theta_sketch_alloc build() const;
433
518
  };
434
519
 
435
- // This is to wrap a buffer containing a serialized compact sketch and use it in a set operation avoiding some cost of deserialization.
436
- // It does not take the ownership of the buffer.
437
-
520
+ /**
521
+ * Wrapped Compact Theta sketch.
522
+ * This is to wrap a buffer containing a serialized compact sketch and use it in a set operation avoiding some cost of deserialization.
523
+ * It does not take the ownership of the buffer.
524
+ */
438
525
  template<typename Allocator = std::allocator<uint64_t>>
439
- class wrapped_compact_theta_sketch_alloc : public base_theta_sketch_alloc<Allocator> {
526
+ class wrapped_compact_theta_sketch_alloc: public base_theta_sketch_alloc<Allocator> {
440
527
  public:
441
528
  class const_iterator;
442
529
 
@@ -447,7 +534,17 @@ public:
447
534
  uint32_t get_num_retained() const;
448
535
  uint16_t get_seed_hash() const;
449
536
 
537
+ /**
538
+ * Const iterator over hash values in this sketch.
539
+ * @return begin iterator
540
+ */
450
541
  const_iterator begin() const;
542
+
543
+ /**
544
+ * Const iterator pointing past the valid range.
545
+ * Not to be incremented or dereferenced.
546
+ * @return end iterator
547
+ */
451
548
  const_iterator end() const;
452
549
 
453
550
  /**
@@ -455,6 +552,7 @@ public:
455
552
  * @param bytes pointer to the array of bytes
456
553
  * @param size the size of the array
457
554
  * @param seed the seed for the hash function that was used to create the sketch
555
+ * @param dump_on_error if true prints hex dump of the input
458
556
  * @return an instance of the sketch
459
557
  */
460
558
  static const wrapped_compact_theta_sketch_alloc wrap(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED, bool dump_on_error = false);
@@ -471,15 +569,22 @@ private:
471
569
  };
472
570
 
473
571
  template<typename Allocator>
474
- class wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator: public std::iterator<std::input_iterator_tag, uint64_t> {
572
+ class wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator {
475
573
  public:
574
+ using iterator_category = std::input_iterator_tag;
575
+ using value_type = const uint64_t;
576
+ using difference_type = void;
577
+ using pointer = value_type*;
578
+ using reference = uint64_t;
579
+
476
580
  const_iterator(const void* ptr, uint8_t entry_bits, uint32_t num_entries, uint32_t index);
477
581
  const_iterator& operator++();
478
582
  const_iterator operator++(int);
479
583
  bool operator==(const const_iterator& other) const;
480
584
  bool operator!=(const const_iterator& other) const;
481
- const uint64_t& operator*() const;
482
- const uint64_t* operator->() const;
585
+ reference operator*() const;
586
+ pointer operator->() const;
587
+
483
588
  private:
484
589
  const void* ptr_;
485
590
  uint8_t entry_bits_;
@@ -492,12 +597,6 @@ private:
492
597
  uint64_t buffer_[8];
493
598
  };
494
599
 
495
- // aliases with default allocator for convenience
496
- using theta_sketch = theta_sketch_alloc<std::allocator<uint64_t>>;
497
- using update_theta_sketch = update_theta_sketch_alloc<std::allocator<uint64_t>>;
498
- using compact_theta_sketch = compact_theta_sketch_alloc<std::allocator<uint64_t>>;
499
- using wrapped_compact_theta_sketch = wrapped_compact_theta_sketch_alloc<std::allocator<uint64_t>>;
500
-
501
600
  } /* namespace datasketches */
502
601
 
503
602
  #include "theta_sketch_impl.hpp"
@@ -357,7 +357,7 @@ void compact_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
357
357
  write(os, flags_byte);
358
358
  write(os, get_seed_hash());
359
359
  if (preamble_longs > 1) {
360
- write<uint32_t>(os, entries_.size());
360
+ write(os, static_cast<uint32_t>(entries_.size()));
361
361
  write<uint32_t>(os, 0); // unused
362
362
  }
363
363
  if (this->is_estimation_mode()) write(os, this->theta_);
@@ -385,7 +385,7 @@ auto compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const
385
385
  *ptr++ = flags_byte;
386
386
  ptr += copy_to_mem(get_seed_hash(), ptr);
387
387
  if (preamble_longs > 1) {
388
- ptr += copy_to_mem<uint32_t>(entries_.size(), ptr);
388
+ ptr += copy_to_mem(static_cast<uint32_t>(entries_.size()), ptr);
389
389
  ptr += sizeof(uint32_t); // unused
390
390
  }
391
391
  if (this->is_estimation_mode()) ptr += copy_to_mem(theta_, ptr);
@@ -432,7 +432,7 @@ void compact_theta_sketch_alloc<A>::serialize_version_4(std::ostream& os) const
432
432
  const uint8_t entry_bits = 64 - compute_min_leading_zeros();
433
433
 
434
434
  // store num_entries as whole bytes since whole-byte blocks will follow (most probably)
435
- const uint8_t num_entries_bytes = whole_bytes_to_hold_bits<uint8_t>(32 - count_leading_zeros_in_u32(entries_.size()));
435
+ const uint8_t num_entries_bytes = whole_bytes_to_hold_bits<uint8_t>(32 - count_leading_zeros_in_u32(static_cast<uint32_t>(entries_.size())));
436
436
 
437
437
  write(os, preamble_longs);
438
438
  write(os, COMPRESSED_SERIAL_VERSION);
@@ -447,7 +447,7 @@ void compact_theta_sketch_alloc<A>::serialize_version_4(std::ostream& os) const
447
447
  write(os, flags_byte);
448
448
  write(os, get_seed_hash());
449
449
  if (this->is_estimation_mode()) write(os, this->theta_);
450
- uint32_t num_entries = entries_.size();
450
+ uint32_t num_entries = static_cast<uint32_t>(entries_.size());
451
451
  for (unsigned i = 0; i < num_entries_bytes; ++i) {
452
452
  write<uint8_t>(os, num_entries & 0xff);
453
453
  num_entries >>= 8;
@@ -488,7 +488,7 @@ auto compact_theta_sketch_alloc<A>::serialize_version_4(unsigned header_size_byt
488
488
  const size_t compressed_bits = entry_bits * entries_.size();
489
489
 
490
490
  // store num_entries as whole bytes since whole-byte blocks will follow (most probably)
491
- const uint8_t num_entries_bytes = whole_bytes_to_hold_bits<uint8_t>(32 - count_leading_zeros_in_u32(entries_.size()));
491
+ const uint8_t num_entries_bytes = whole_bytes_to_hold_bits<uint8_t>(32 - count_leading_zeros_in_u32(static_cast<uint32_t>(entries_.size())));
492
492
 
493
493
  const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + num_entries_bytes
494
494
  + whole_bytes_to_hold_bits(compressed_bits);
@@ -510,7 +510,7 @@ auto compact_theta_sketch_alloc<A>::serialize_version_4(unsigned header_size_byt
510
510
  if (this->is_estimation_mode()) {
511
511
  ptr += copy_to_mem(theta_, ptr);
512
512
  }
513
- uint32_t num_entries = entries_.size();
513
+ uint32_t num_entries = static_cast<uint32_t>(entries_.size());
514
514
  for (unsigned i = 0; i < num_entries_bytes; ++i) {
515
515
  *ptr++ = num_entries & 0xff;
516
516
  num_entries >>= 8;
@@ -869,13 +869,13 @@ bool wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator==(c
869
869
  }
870
870
 
871
871
  template<typename Allocator>
872
- const uint64_t& wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator*() const {
872
+ auto wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator*() const -> reference {
873
873
  if (entry_bits_ == 64) return *reinterpret_cast<const uint64_t*>(ptr_);
874
874
  return buffer_[buf_i_];
875
875
  }
876
876
 
877
877
  template<typename Allocator>
878
- const uint64_t* wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator->() const {
878
+ auto wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator->() const -> pointer {
879
879
  if (entry_bits_ == 64) return reinterpret_cast<const uint64_t*>(ptr_);
880
880
  return buffer_ + buf_i_;
881
881
  }